In [2]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import cartopy.crs as ccrs
from datetime import datetime, timedelta
import matplotlib.gridspec as gridspec
from matplotlib import animation, rc
import matplotlib.colors as colors
from IPython.display import HTML
import collections
import seaborn as sns
from collections import OrderedDict
from sklearn.neighbors import KDTree, BallTree
from sklearn.metrics import pairwise
from pandas.tseries.offsets import MonthEnd
import shapefile
from shapely.geometry import shape, Point, Polygon
import scipy.stats as stats
from cartopy.mpl.ticker import LongitudeFormatter, LatitudeFormatter
import pysal
import scipy.stats
from matplotlib.colors import LogNorm
from matplotlib.font_manager import FontProperties
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
from mpl_toolkits.axes_grid1.inset_locator import zoomed_inset_axes
from mpl_toolkits.axes_grid1.inset_locator import mark_inset
import cartopy.feature as feature
from zipfile import ZipFile
from lxml import html
import xml.etree.ElementTree as et
import h5py
In [3]:
output_path = '/Users/danielfisher/Dropbox/working_documents/papers/globalgasflaring/figures/revision_1'
output_path_ds = '/Users/danielfisher/Projects/kcl-globalgasflaring/data/output'
In [4]:
def get_arcmin(x):
'''
rounds the data decimal fraction of a degree
to the nearest arc minute
'''
neg_values = x < 0
abs_x = np.abs(x)
floor_x = np.floor(abs_x)
decile = abs_x - floor_x
minute = np.around(decile * 60) # round to nearest arcmin
minute_fraction = minute*0.01 # convert to fractional value (ranges from 0 to 0.6)
max_minute = minute_fraction > 0.59
floor_x[neg_values] *= -1
floor_x[neg_values] -= minute_fraction[neg_values]
floor_x[~neg_values] += minute_fraction[~neg_values]
# deal with edge cases, and just round them all up
if np.sum(max_minute) > 0:
floor_x[max_minute] = np.around(floor_x[max_minute])
# now to get rid of rounding errors and allow comparison multiply by 100 and convert to int
floor_x = (floor_x * 100).astype(int)
return floor_x
Load in the hotspot dataframes for ATX and SLS:
In [5]:
hotspot_path_ats = '/Users/danielfisher/Projects/kcl-globalgasflaring/data/processed/l3/all_sensors/v2/all_flares_atx_adaptive.csv'
hotspot_path_sls = '/Users/danielfisher/Projects/kcl-globalgasflaring/data/processed/l3/all_sensors/v2/all_flares_sls_adaptive.csv'
ats_hotspot_df = pd.read_csv(hotspot_path_ats)
sls_hotspot_df = pd.read_csv(hotspot_path_sls)
ats_hotspot_df['sample_counts_flare'] = 1
sls_hotspot_df['sample_counts_flare'] = 1
In [730]:
ats_hotspot_df.columns
Out[730]:
In [6]:
# slstr radiance and frp adjustment
In [7]:
sls_hotspot_df.frp *= 1.12
sls_hotspot_df.swir_radiances *= 1.12
sls_hotspot_df.swir_radiances_22 *= 1.20
Load in the sampling dataframes for ATX and SLS:
In [8]:
# generate ATX sampling dataframe
path = '/Users/danielfisher/Projects/kcl-globalgasflaring/data/processed/l3/all_sensors/v2/'
df_names = ['all_sampling_at1_adaptive.csv', 'all_sampling_at2_adaptive.csv', 'all_sampling_ats_adaptive.csv']
df_list = []
for df in df_names:
df_list.append(pd.read_csv(os.path.join(path, df)))
ats_hotspot_sampling_df = pd.concat(df_list)
ats_hotspot_sampling_df['sample_counts_all'] = 1
In [9]:
#sampling_path_ats = '/Users/danielfisher/Projects/kcl-globalgasflaring/data/processed/l3/all_sensors/all_sampling_ats.csv'
sampling_path_sls = '/Users/danielfisher/Projects/kcl-globalgasflaring/data/processed/l3/all_sensors/v2/all_sampling_sls_adaptive.csv'
#ats_hotspot_sampling_df = pd.read_csv(sampling_path_ats)
sls_hotspot_sampling_df = pd.read_csv(sampling_path_sls)
#ats_hotspot_sampling_df['sample_counts_all'] = 1
sls_hotspot_sampling_df['sample_counts_all'] = 1
Load in the volcano dataframe:
In [10]:
volc_path = '/Users/danielfisher/Projects/kcl-globalgasflaring/data/external/volcanoes/volcanoes.csv'
volc_df = pd.read_csv(volc_path)
volc_df.dropna(inplace=True)
Generate the countries dataframe for the hotspot data. Each unique hotspot location is assigned a country based on the EEZ land boundaries shape file.
In [11]:
def extract_countries():
c_shp = '/Users/danielfisher/Projects/kcl-globalgasflaring/data/external/borders/EEZ_land_union_v2_201410/EEZ_land_v2_201410.shp'
shape_file = shapefile.Reader(c_shp)
#first feature of the shapefile
countries = {}
for feature in shape_file.shapeRecords():
country_name = feature.record[2]
country_shp = shape(feature.shape.__geo_interface__)
#eez_boundaries[eez_name] = eez_shp.simplify(0.2)
countries[country_name] = country_shp
return countries
def create_countries_dataframe(countries, df):
cols = ['lats_arcmin', 'lons_arcmin', 'lats', 'lons']
countries_df = df.drop_duplicates(['lats_arcmin', 'lons_arcmin'])[cols].copy()
country_list = []
for i, row in countries_df.iterrows():
country_found = False
point = Point(row.lons,row.lats)
for k in countries:
poly = countries[k]
if point.within(poly):
country_list.append(k)
country_found = True
break
if country_found:
continue
# if we are here no countries have been found so
country_list.append('Without Country')
countries_df['countries'] = country_list
return countries_df
countries_dict = extract_countries()
ats_countries_df = create_countries_dataframe(countries_dict, ats_hotspot_df)
sls_countries_df = create_countries_dataframe(countries_dict, sls_hotspot_df)
Load in the BCM data from DMSP for comparison against ATSR dataset:
In [12]:
bcm_path = '/Users/danielfisher/Projects/kcl-globalgasflaring/data/external/dmsp/BCM_Global_20110223.xlsx'
cols=['Location', 'Year', 'MEAN BCM']
bcm_df_dict = pd.read_excel(bcm_path, sheetname=None)
bcm_df = pd.concat(bcm_df_dict).reset_index()[['Year', 'Location', 'MEAN BCM']]
bcm_df.loc[bcm_df.Location=='Russia_Combined', 'Location'] = 'Russia'
bcm_df.loc[bcm_df.Location=='USA_Combined', 'Location'] = 'USA'
bcm_df.rename(columns={'Location': 'countries', 'Year': 'year', 'MEAN BCM': 'bcm'}, inplace=True)
Load in the VIIRS hotspot data for Iraq for comparison against ATSR:
In [543]:
viirs_iraq_hotspot_path = '/Users/danielfisher/Projects/kcl-globalgasflaring/data/external/viirs_iraq_flares/hotspots_laads.xlsx'
viirs_iraq_hotspot_df = pd.read_excel(viirs_iraq_hotspot_path)
viirs_iraq_hotspot_df.rename(columns={'lat': 'lats', 'lon': 'lons'}, inplace=True)
Load in VIIRS hotspot data for bias assessment of SLSTR:
In [14]:
viirs_bias_hotspot_path = '/Users/danielfisher/Projects/kcl-globalgasflaring/data/external/slstr_bias_assessment'
viirs_got_hotspot_df = pd.read_csv(os.path.join(viirs_bias_hotspot_path, 'got.csv'))
viirs_got_hotspot_df.rename(columns={'lat': 'lats', 'lon': 'lons'}, inplace=True)
viirs_libya_hotspot_df = pd.read_csv(os.path.join(viirs_bias_hotspot_path, 'libya.csv'))
viirs_libya_hotspot_df.rename(columns={'lat': 'lats', 'lon': 'lons'}, inplace=True)
viirs_oman_hotspot_df = pd.read_csv(os.path.join(viirs_bias_hotspot_path, 'oman.csv'))
viirs_oman_hotspot_df.rename(columns={'lat': 'lats', 'lon': 'lons'}, inplace=True)
Load in the VIIRS industrial flaring activity for comparison against ATSR:
In [15]:
viirs_industrial_path = '/Users/danielfisher/Projects/kcl-globalgasflaring/data/external/viirs_industrial_sites'
kmz_fname = 'doc.kml'
doc = et.parse(os.path.join(viirs_industrial_path, kmz_fname))
In [16]:
nmsp = '{http://www.opengis.net/kml/2.2}'
df_list = []
for pm in doc.iterfind('.//{0}Placemark'.format(nmsp)):
name = (pm.find('{0}name'.format(nmsp)).text)
for ls in pm.iterfind('.//{0}coordinates'.format(nmsp)):
pos = np.fromstring(ls.text.strip().replace('\n',''), dtype=float, sep=', ')
lon = pos[0]
lat = pos[1]
df = pd.DataFrame([[lat,lon,name]], columns=['lats', 'lons', 'Name'])
df_list.append(df)
break
viirs_industrial_df = pd.concat(df_list)
viirs_industrial_df['lats_arcmin'] = get_arcmin(viirs_industrial_df.lats.values)
viirs_industrial_df['lons_arcmin'] = get_arcmin(viirs_industrial_df.lons.values)
id_dict = {'Nonmetal-mineral':1,
'Ferrous-metal': 2,
'Coal-processing': 3,
'Oil/gas': 4
}
viirs_industrial_df['name_id'] = viirs_industrial_df.Name.map(id_dict)
In [17]:
viirs_countries_df = create_countries_dataframe(countries_dict, viirs_industrial_df)
Load in NightFire data for comparison against SLSTR:
In [18]:
nightfire_path = '/Users/danielfisher/Projects/kcl-globalgasflaring/data/external/viirs_nightfire'
df_list = []
for f in os.listdir(nightfire_path):
df_list.append(pd.read_csv(os.path.join(nightfire_path, f)))
nightfire_df = pd.concat(df_list)
nightfire_df.rename(columns={'Lat_GMTCO': 'lats', 'Lon_GMTCO': 'lons'}, inplace=True)
nightfire_df['lats_arcmin'] = get_arcmin(nightfire_df.lats.values)
nightfire_df['lons_arcmin'] = get_arcmin(nightfire_df.lons.values)
In [19]:
nightfire_countries_df = create_countries_dataframe(countries_dict, nightfire_df)
In [20]:
def load_viirs_zenith():
path = '/Users/danielfisher/Projects/kcl-globalgasflaring/data/external/viirs_zenith/'
f = 'GMODO-SVM01-SVM03-SVM04-SVM05_npp_d20150705_t0254234_e0255476_b19092_c20171108213818198685_noaa_ops.h5'
ds = h5py.File(os.path.join(path, f))
vza = ds['All_Data']['VIIRS-MOD-GEO_All']['SatelliteZenithAngle'][:]
return vza[0,:]
viirs_zenith_angles = load_viirs_zenith()
Some of the ATX sensors have various issues during there operational lifetimes (see the notes about this at the bottom). It therefore makes sense to prioritse data from a certain sensor over another given any overlapping periods. Overlaps are removed from the persistent hotspot and sampling dataframes below before any further analysis is performed on the data. The justifications for the sensors prioritisation in any overlapping period are given at the bottom of the notebook. In some instances, there is still poor sensor operational behaviour, but if no overlapping data is available we have to use what is given to us. The masking is only applied to the ATX data, not SLSTR:
In [21]:
# persistent hotspot df
mask = (~((ats_hotspot_df.sensor == 'at2' ) & (ats_hotspot_df.year == 1995)) &
~((ats_hotspot_df.sensor == 'at2' ) & (ats_hotspot_df.year == 2003)) &
~((ats_hotspot_df.sensor == 'at2' ) & (ats_hotspot_df.year == 1996) & (ats_hotspot_df.month == 1)) &
~((ats_hotspot_df.sensor == 'at2' ) & (ats_hotspot_df.year == 1996) & (ats_hotspot_df.month == 2)) &
~((ats_hotspot_df.sensor == 'at2' ) & (ats_hotspot_df.year == 1996) & (ats_hotspot_df.month == 3)) &
~((ats_hotspot_df.sensor == 'at2' ) & (ats_hotspot_df.year == 1996) & (ats_hotspot_df.month == 4)) &
~((ats_hotspot_df.sensor == 'at2' ) & (ats_hotspot_df.year == 1996) & (ats_hotspot_df.month == 5)) &
~((ats_hotspot_df.sensor == 'at1' ) & (ats_hotspot_df.year == 1996) & (ats_hotspot_df.month == 7)) &
~((ats_hotspot_df.sensor == 'at1' ) & (ats_hotspot_df.year == 1996) & (ats_hotspot_df.month == 8)) &
~((ats_hotspot_df.sensor == 'at1' ) & (ats_hotspot_df.year == 1996) & (ats_hotspot_df.month == 9)) &
~((ats_hotspot_df.sensor == 'at1' ) & (ats_hotspot_df.year == 1996) & (ats_hotspot_df.month == 10)) &
~((ats_hotspot_df.sensor == 'at1' ) & (ats_hotspot_df.year == 1996) & (ats_hotspot_df.month == 11)) &
~((ats_hotspot_df.sensor == 'at1' ) & (ats_hotspot_df.year == 1996) & (ats_hotspot_df.month == 12)) &
~((ats_hotspot_df.sensor == 'at2' ) & (ats_hotspot_df.year == 2002) & (ats_hotspot_df.month == 4)) &
~((ats_hotspot_df.sensor == 'at2' ) & (ats_hotspot_df.year == 2002) & (ats_hotspot_df.month == 5)) &
~((ats_hotspot_df.sensor == 'at2' ) & (ats_hotspot_df.year == 2002) & (ats_hotspot_df.month == 6)) &
~((ats_hotspot_df.sensor == 'at2' ) & (ats_hotspot_df.year == 2002) & (ats_hotspot_df.month == 7)) &
~((ats_hotspot_df.sensor == 'at2' ) & (ats_hotspot_df.year == 2002) & (ats_hotspot_df.month == 8)) &
~((ats_hotspot_df.sensor == 'at2' ) & (ats_hotspot_df.year == 2002) & (ats_hotspot_df.month == 9)) &
~((ats_hotspot_df.sensor == 'at2' ) & (ats_hotspot_df.year == 2002) & (ats_hotspot_df.month == 10)) &
~((ats_hotspot_df.sensor == 'at2' ) & (ats_hotspot_df.year == 2002) & (ats_hotspot_df.month == 11)) &
~((ats_hotspot_df.sensor == 'at2' ) & (ats_hotspot_df.year == 2002) & (ats_hotspot_df.month == 12))
)
ats_hotspot_df = ats_hotspot_df[mask]
In [22]:
print ats_hotspot_df.drop_duplicates(['lats_arcmin', 'lons_arcmin']).shape[0]
In [23]:
# all sampling dataframe
mask = (~((ats_hotspot_sampling_df.sensor == 'at2' ) & (ats_hotspot_sampling_df.year == 1995)) &
~((ats_hotspot_sampling_df.sensor == 'at2' ) & (ats_hotspot_sampling_df.year == 2003)) &
~((ats_hotspot_sampling_df.sensor == 'at2' ) & (ats_hotspot_sampling_df.year == 1996) & (ats_hotspot_sampling_df.month == 1)) &
~((ats_hotspot_sampling_df.sensor == 'at2' ) & (ats_hotspot_sampling_df.year == 1996) & (ats_hotspot_sampling_df.month == 2)) &
~((ats_hotspot_sampling_df.sensor == 'at2' ) & (ats_hotspot_sampling_df.year == 1996) & (ats_hotspot_sampling_df.month == 3)) &
~((ats_hotspot_sampling_df.sensor == 'at2' ) & (ats_hotspot_sampling_df.year == 1996) & (ats_hotspot_sampling_df.month == 4)) &
~((ats_hotspot_sampling_df.sensor == 'at2' ) & (ats_hotspot_sampling_df.year == 1996) & (ats_hotspot_sampling_df.month == 5)) &
~((ats_hotspot_sampling_df.sensor == 'at1' ) & (ats_hotspot_sampling_df.year == 1996) & (ats_hotspot_sampling_df.month == 7)) &
~((ats_hotspot_sampling_df.sensor == 'at1' ) & (ats_hotspot_sampling_df.year == 1996) & (ats_hotspot_sampling_df.month == 8)) &
~((ats_hotspot_sampling_df.sensor == 'at1' ) & (ats_hotspot_sampling_df.year == 1996) & (ats_hotspot_sampling_df.month == 9)) &
~((ats_hotspot_sampling_df.sensor == 'at1' ) & (ats_hotspot_sampling_df.year == 1996) & (ats_hotspot_sampling_df.month == 10)) &
~((ats_hotspot_sampling_df.sensor == 'at1' ) & (ats_hotspot_sampling_df.year == 1996) & (ats_hotspot_sampling_df.month == 11)) &
~((ats_hotspot_sampling_df.sensor == 'at1' ) & (ats_hotspot_sampling_df.year == 1996) & (ats_hotspot_sampling_df.month == 12)) &
~((ats_hotspot_sampling_df.sensor == 'at2' ) & (ats_hotspot_sampling_df.year == 2002) & (ats_hotspot_sampling_df.month == 4)) &
~((ats_hotspot_sampling_df.sensor == 'at2' ) & (ats_hotspot_sampling_df.year == 2002) & (ats_hotspot_sampling_df.month == 5)) &
~((ats_hotspot_sampling_df.sensor == 'at2' ) & (ats_hotspot_sampling_df.year == 2002) & (ats_hotspot_sampling_df.month == 6)) &
~((ats_hotspot_sampling_df.sensor == 'at2' ) & (ats_hotspot_sampling_df.year == 2002) & (ats_hotspot_sampling_df.month == 7)) &
~((ats_hotspot_sampling_df.sensor == 'at2' ) & (ats_hotspot_sampling_df.year == 2002) & (ats_hotspot_sampling_df.month == 8)) &
~((ats_hotspot_sampling_df.sensor == 'at2' ) & (ats_hotspot_sampling_df.year == 2002) & (ats_hotspot_sampling_df.month == 9)) &
~((ats_hotspot_sampling_df.sensor == 'at2' ) & (ats_hotspot_sampling_df.year == 2002) & (ats_hotspot_sampling_df.month == 10)) &
~((ats_hotspot_sampling_df.sensor == 'at2' ) & (ats_hotspot_sampling_df.year == 2002) & (ats_hotspot_sampling_df.month == 11)) &
~((ats_hotspot_sampling_df.sensor == 'at2' ) & (ats_hotspot_sampling_df.year == 2002) & (ats_hotspot_sampling_df.month == 12))
)
ats_hotspot_sampling_df = ats_hotspot_sampling_df[mask]
In [688]:
print ats_hotspot_sampling_df.drop_duplicates(['lats_arcmin', 'lons_arcmin']).shape[0]
In [24]:
sls_hotspot_df_0317 = sls_hotspot_df[(sls_hotspot_df.year == 2017) & (sls_hotspot_df.month == 3)]
mask = ((sls_hotspot_df.year != 2016) & ~((sls_hotspot_df.year == 2017) &
(sls_hotspot_df.month <= 4)))
sls_hotspot_df = sls_hotspot_df[mask]
In [25]:
print sls_hotspot_df.drop_duplicates(['lats_arcmin', 'lons_arcmin']).shape[0]
In [26]:
mask = ((sls_hotspot_sampling_df.year != 2016) & ~((sls_hotspot_sampling_df.year == 2017) &
(sls_hotspot_sampling_df.month <= 4)))
sls_hotspot_sampling_df = sls_hotspot_sampling_df[mask]
Various approaches are applied in this section to discriminate between flaring and non-flaring persistent hotspots. First the spectral ratio method is applied to the ATX and SLS datasets. In the case of ATX all data with either invalid backgrounds, saturated MWIR radianes (are already NaN), or orberved by AT1 have the MWIR radiance value set to NaN. The ratio is then computed between the SWIR and MWIR spectral radiances (with the NaN being preserved through this ratio). For each thermal anomaly grid cell the median of all ratio values is then computed across the entire time series for the grid cell, if this median value is found to be between the assumed flaring thresholds then it is set to be a flare.
In the case of SLSTR, no data is excluded prior to the calculation of the median, and no upper threshold is used.
Any AT1 data that has associated AT2 and ATS data with its grid cell now has a flaring acitivity classification from the application of the spectral ratio method. However, some grid cells are only associated with AT1 falres and therefore have no valid classification. In such instances, the AT1 data is assigned a classficiation based on a majority voting looking at all grid cells within 1 deg over the entire time series, working on the assumption that flaring activity is clustered over certain spatial scales.
In [27]:
ats_ratio_df = ats_hotspot_df[['mwir_bg', 'sensor', 'swir_radiances', 'mwir_radiances',
'lats_arcmin', 'lons_arcmin', 'lats', 'lons', 'sample_counts_flare']].copy()
ats_ratio_df['mwir_saturated_counts'] = ats_ratio_df.mwir_radiances.isnull()
ats_ratio_df['mwir_no_bg_counts'] = ats_ratio_df.mwir_bg == -1
ats_ratio_df.mwir_bg.loc[ats_ratio_df.mwir_bg == -1] = np.nan # invalid background
ats_ratio_df.mwir_bg.loc[ats_ratio_df.sensor == 'at1'] = np.nan # no MWIR from AT1
eps= 0.001
ratio = ats_ratio_df.swir_radiances.values / \
((ats_ratio_df.mwir_radiances.values - ats_ratio_df.mwir_bg.values)+ eps)
ats_ratio_df['ratio'] = ratio
to_group = ['lats_arcmin', 'lons_arcmin']
to_agg = {'ratio': np.nanmedian,
'lats': np.mean,
'lons': np.mean,
'sample_counts_flare': np.sum,
'mwir_saturated_counts': np.sum,
'mwir_no_bg_counts': np.sum}
ats_ratio_df = ats_ratio_df.groupby(to_group, as_index=False).agg(to_agg)
ats_ratio_df['is_flare'] = (ats_ratio_df.ratio >= 1.61) & (ats_ratio_df.ratio < 8.35)
In [28]:
flaring_map = ats_ratio_df[ats_ratio_df.is_flare.astype('bool')][['lats_arcmin', 'lons_arcmin']]
non_flaring_map = ats_ratio_df[~ats_ratio_df.is_flare.astype('bool') & ats_ratio_df.ratio.notnull()][['lats_arcmin', 'lons_arcmin']]
null_map = ats_ratio_df[~ats_ratio_df.is_flare.astype('bool') & ats_ratio_df.ratio.isnull()][['lats_arcmin', 'lons_arcmin']]
In [29]:
for l, m in zip(['flares', 'not_flares', 'null'], [flaring_map, non_flaring_map, null_map]):
sub_df = ats_hotspot_df.merge(m, on=['lats_arcmin', 'lons_arcmin'])
print 'AT1', l, sub_df[sub_df.sensor == 'at1'].groupby(['lats_arcmin', 'lons_arcmin']).agg({'sample_counts_flare': np.sum}).shape[0]
print 'AT2', l, sub_df[sub_df.sensor == 'at2'].groupby(['lats_arcmin', 'lons_arcmin']).agg({'sample_counts_flare': np.sum}).shape[0]
print 'ATS', l, sub_df[sub_df.sensor == 'ats'].groupby(['lats_arcmin', 'lons_arcmin']).agg({'sample_counts_flare': np.sum}).shape[0]
print
In [30]:
sls_ratio_df = sls_hotspot_df[['sensor', 'swir_radiances', 'swir_radiances_22',
'lats_arcmin', 'lons_arcmin', 'lats', 'lons', 'sample_counts_flare']].copy()
eps= 0.001
ratio = sls_ratio_df.swir_radiances.values / (sls_ratio_df.swir_radiances_22 + eps)
sls_ratio_df['ratio'] = ratio
# here taking the mean behaviour of the gas flare over all its samples.
# This is where something is going wrong with the SLSTR behaviour.
# so maybe we have some bad data in here somewhere.
to_group = ['lats_arcmin', 'lons_arcmin']
to_agg = {'ratio': np.nanmedian,
'lats': np.mean,
'lons': np.mean,
'sample_counts_flare': np.sum}
sls_ratio_df = sls_ratio_df.groupby(to_group, as_index=False).agg(to_agg)
# inser flare variable
sls_ratio_df['is_flare'] = (sls_ratio_df.ratio >= 0.94) & (sls_ratio_df.ratio < 1.92)
In [31]:
print 'SLS is flare', sls_ratio_df['is_flare'].sum()
print 'SLS not flare', (~sls_ratio_df['is_flare']).sum()
In [32]:
plt.close('all')
def planck_radiance(wvl, temp):
'''
wvl: wavelngth (microns)
temp: temperature (kelvin)
'''
c1 = 1.19e-16 # W m-2 sr-1
c2 = 1.44e-2 # mK
wt = (wvl*1.e-6) * temp # m K
d = (wvl*1.e-6)**5 * (np.exp(c2/wt)-1)
return c1 / d * 1.e-6 # W m-2 sr-1 um-1
# set up fig
fig, (ax0, ax1, ax2) = plt.subplots(nrows=1, ncols=3, figsize=(15, 5))
# spectral ratio plot
flare_size = 10000 # in sq. m.
pixel_size = 1e6 # in sq. m.
flare_area_pc = flare_size / pixel_size # unitless
temps = np.arange(0,3501, 1) # in K
spect_rad_swir = flare_area_pc * planck_radiance(1.6, temps)
spect_rad_swir_2 = flare_area_pc * planck_radiance(2.2, temps)
spect_rad_mwir = flare_area_pc * planck_radiance(3.7, temps)
spect_rad_lwir = flare_area_pc * planck_radiance(11, temps)
ratio_mwir = spect_rad_swir / spect_rad_mwir
ratio_swir = spect_rad_swir / spect_rad_swir_2
print 'MWIR ratio 1400K', ratio_mwir[1400]
print 'MWIR ratio 2800K', ratio_mwir[2800]
print 'SWIR ratio 1400K', ratio_swir[1400]
print 'SWIR ratio 2800K', ratio_swir[2800]
ax0.plot(temps, ratio_mwir, "k--", linewidth=1, label="$L_{1.6} / L_{3.7}$")
ax0.plot([-1,1400],[ratio_mwir[1400], ratio_mwir[1400]],'r--',linewidth=1)
ax0.plot([-1,2800],[ratio_mwir[2800], ratio_mwir[2800]],'r--',linewidth=1)
ax0.set_ylim([-0.25,10])
ax0.set_xlim([-0.25,3100])
ax0.plot(temps, ratio_swir, "k-", linewidth=1, label="$L_{1.6} / L_{2.2}$")
#ax0.plot([1400,3501],[ratio_swir[1400], ratio_swir[1400]],'r-',linewidth=1)
#ax0.plot([2800,3501],[ratio_swir[2800], ratio_swir[2800]],'r-',linewidth=1)
ax0.plot([-1,1400],[ratio_swir[1400], ratio_swir[1400]],'r-',linewidth=1)
ax0.plot([-1,2800],[ratio_swir[2800], ratio_swir[2800]],'r-',linewidth=1)
ax0.legend(prop={'size': 15})
ax0.set_xlabel("Temperature (K)", fontsize=16)
ax0.set_ylabel("Spectral Ratio", fontsize=16)
# histogram plots
ax1.hist(ats_ratio_df.ratio[(ats_ratio_df.ratio > 0) & (ats_ratio_df.ratio < 20)],
bins=100, normed=True, histtype='step', color='k')
ax1.plot([1.6,1.61], [0,0.4], 'r--', linewidth=2)
ax1.plot([8.34,8.34], [0,0.4], 'r--', linewidth=2)
ax1.set_ylim([0, 0.3])
ax1.set_xlabel('ATSR Spectral Ratio', fontsize=16)
ax1.set_ylabel('Normalised Frequency', fontsize=14)
ax2.hist(sls_ratio_df.ratio[(sls_ratio_df.ratio > 0) & (sls_ratio_df.ratio < 5)],
bins=100, normed=True, histtype='step', color='k')
ax2.plot([0.85,0.85], [0,1.5], 'r-', linewidth=2)
ax2.plot([1.92,1.92], [0,1.5], 'r-', linewidth=2)
ax2.set_xlabel('SLSTR Spectral Ratio', fontsize=16)
ax2.set_ylabel('Normalised Frequency', fontsize=14)
ax2.set_ylim([0, 1.4])
ax0.text(0.91, 0.05, '(a)', transform=ax0.transAxes, fontsize=14)
ax1.text(0.91, 0.05, '(b)', transform=ax1.transAxes, fontsize=14)
ax2.text(0.91, 0.05, '(c)', transform=ax2.transAxes, fontsize=14)
plt.savefig(os.path.join(output_path, 'ratio_plot.png'), bbox_inches='tight', dpi=600)
plt.show()
Assess the ratio distribution at the level of the sample (the above was done at the level of the flare). With this we can see how much sampling there is in the region of overlap in the above figure.
In [33]:
ats_sample_level_ratio = ats_hotspot_df.merge(ats_ratio_df, on=['lats_arcmin', 'lons_arcmin'])
sls_sample_level_ratio = sls_hotspot_df.merge(sls_ratio_df, on=['lats_arcmin', 'lons_arcmin'])
In [34]:
plt.close('all')
# set up fig
fig, (ax0, ax1) = plt.subplots(nrows=1, ncols=2, figsize=(12, 5))
# histogram plots
ax0.hist(ats_sample_level_ratio.ratio[(ats_sample_level_ratio.ratio > 0) & (ats_sample_level_ratio.ratio < 20)],
bins=100, normed=False, histtype='step', color='k')
ax0.plot([1.6,1.61], [0,300000], 'r--', linewidth=2)
ax0.plot([8.34,8.34], [0,300000], 'r--', linewidth=2)
ax0.set_ylim([0, 220000])
ax0.set_xlabel('ATSR Spectral Ratio', fontsize=16)
ax0.set_ylabel('Frequency', fontsize=14)
ax1.hist(sls_sample_level_ratio.ratio[(sls_sample_level_ratio.ratio > 0) & (sls_sample_level_ratio.ratio < 5)],
bins=100, normed=False, histtype='step', color='k')
ax1.plot([0.85,0.85], [0,15500], 'r-', linewidth=2)
ax1.plot([1.92,1.92], [0,15500], 'r-', linewidth=2)
ax1.set_xlabel('SLSTR Spectral Ratio', fontsize=16)
ax1.set_ylabel('Frequency', fontsize=14)
ax1.set_ylim([0, 15500])
ax0.text(0.91, 0.05, '(a)', transform=ax0.transAxes, fontsize=14)
ax1.text(0.91, 0.05, '(b)', transform=ax1.transAxes, fontsize=14)
plt.savefig(os.path.join(output_path, 'sample_level_ratio_plot.png'), bbox_inches='tight', dpi=600)
plt.show()
In some cases, we wish to reassign points will null ratios to the flaring class. As currently, if the ratio is invalid then we assume that the point is not a flare, and that may not be the case. In all cases if the ratio is invlaid then the classification of the flares is that of its neighbours.
For AT1 all persistent hotspots have no ratios associated with them. In those instances where ATS and AT2 data are avilable in a grid cell then, assuming ATS and AT2 ratios are avilable, the grid cell can still have a valid ratio assigned. However, not all AT1 grid cells have ATS and AT2 data associated with them. IN such cases the assigment is made from the nearest neighbours.
For AT2 and ATS if the a grid cell has no valid ratio values then we also assign the ratio from nearby grid cells. The causes of no valid ratios for these sesors are either, no suitable background radiance estimate or a saturated MWIR channel. Less often sampled gas flares may suffer from no valid background, and larger more extreme flares may result in saturation of the MWIR channel. So we assign class based on the local neighbourhood.
In [35]:
def reassign_flares(ratio_df, k=1000, d=1):
'''
k = number of neighbors to consider
d = max distance to consider for neighbours
'''
current_flares = ratio_df.is_flare.values
flare_locations = np.array(zip(np.deg2rad(ratio_df.lats.values),
np.deg2rad(ratio_df.lons.values)))
# compare the flare locations to the potential locations in the orbit
locations_balltree = BallTree(flare_locations, metric='haversine')
distances, indexes = locations_balltree.query(flare_locations, k=k)
# distances mask
self_mask = np.rad2deg(distances) > 1/60 # do not consider the point itself
masked_distances = (np.rad2deg(distances) < d) & self_mask # and not pionts > 1 deg away
# update is flare based on neighbours
is_flare = np.ones(ratio_df.ratio.shape)*999
for i in xrange(is_flare.size):
# get the ratio of the set of flares a closest to current flare
closest_flares = ratio_df.is_flare[indexes[i,:]]
# mask based on distances
masked_closest = closest_flares[masked_distances[i, :]]
if len(masked_closest) > 0:
# take the nanmean of valid values. We will have nans nearby
# due to invalid backgrounds and observations from AT1
mean_local_type = np.nanmean(masked_closest)
# if mean is > 0.5, then majority is flare, else majority is industrial
if mean_local_type >= 0.5:
is_flare[i] = 1
else:
is_flare[i] = 0
else:
is_flare[i] = current_flares[i]
# get the null ratio mask
ratio_df['updated_flare'] = is_flare
return ratio_df
In [36]:
ats_ratio_df = reassign_flares(ats_ratio_df, d=1)
In [37]:
mask = ats_ratio_df.ratio.isnull()
ats_ratio_df.is_flare.loc[mask] = ats_ratio_df.updated_flare.loc[mask]
In [38]:
flaring_map = ats_ratio_df[ats_ratio_df.is_flare.astype('bool')][['lats_arcmin', 'lons_arcmin']]
non_flaring_map = ats_ratio_df[~ats_ratio_df.is_flare.astype('bool')][['lats_arcmin', 'lons_arcmin']]
In [39]:
for l, m in zip(['flares', 'not_flares'], [flaring_map, non_flaring_map]):
sub_df = ats_hotspot_df.merge(m, on=['lats_arcmin', 'lons_arcmin'])
print 'AT1', l, sub_df[sub_df.sensor == 'at1'].groupby(['lats_arcmin', 'lons_arcmin']).agg({'sample_counts_flare': np.sum}).shape[0]
print 'AT2', l, sub_df[sub_df.sensor == 'at2'].groupby(['lats_arcmin', 'lons_arcmin']).agg({'sample_counts_flare': np.sum}).shape[0]
print 'ATS', l, sub_df[sub_df.sensor == 'ats'].groupby(['lats_arcmin', 'lons_arcmin']).agg({'sample_counts_flare': np.sum}).shape[0]
print
As there is overlap between flaring and non-flaring thermal anomalies, further screening of the dataset is beneficial. This is particularly the case for volcanoes, which if detected can produce very large signals that are not representative of true flaring activity. Here we screen any data points that are within 10 km of a volcano that has been seen active at any point.
In [40]:
def dist_to_nearest_volcano(volc_df, ratio_df):
volc_lat_lon = np.dstack([np.deg2rad(volc_df.Latitude.values),
np.deg2rad(volc_df.Longitude.values)])[0]
volc_balltree = BallTree(volc_lat_lon, metric='haversine')
# get the unique flare lats and lons for assessment in kdtree
flare_locations = np.array(zip(np.deg2rad(ratio_df.lats.values),
np.deg2rad(ratio_df.lons.values)))
# compare the flare locations to the potential locations in the orbit
distances, indexes = volc_balltree.query(flare_locations, k=1)
# set up the dataframe to hold the distances
degree_distances = np.rad2deg(distances)
mask = degree_distances < 5.0/60 # if within ~10km of any volcano, then is volcano
ratio_df['is_volcano'] = mask
return ratio_df
In [41]:
ats_ratio_df = dist_to_nearest_volcano(volc_df, ats_ratio_df)
sls_ratio_df = dist_to_nearest_volcano(volc_df, sls_ratio_df)
In [42]:
# we want to know how many more points are exluded by volcanoes, on top of the points already excluded
# by the ratio test, so where the ratio is good!
ats_volcano_map = ats_ratio_df[ats_ratio_df.is_volcano.astype('bool') &
ats_ratio_df.is_flare][['lats_arcmin', 'lons_arcmin']]
sls_volcano_map = sls_ratio_df[sls_ratio_df.is_volcano.astype('bool') &
sls_ratio_df.is_flare][['lats_arcmin', 'lons_arcmin']]
In [43]:
ats_ratio_df.is_flare.loc[ats_ratio_df.is_volcano] = 0
sls_ratio_df.is_flare.loc[sls_ratio_df.is_volcano] = 0
In [44]:
sub_df = ats_hotspot_df.merge(ats_volcano_map, on=['lats_arcmin', 'lons_arcmin'])
print 'AT1', 'Volcanoes', sub_df[sub_df.sensor == 'at1'].groupby(['lats_arcmin', 'lons_arcmin']).agg({'sample_counts_flare': np.sum}).shape[0]
print 'AT2', 'Volcanoes', sub_df[sub_df.sensor == 'at2'].groupby(['lats_arcmin', 'lons_arcmin']).agg({'sample_counts_flare': np.sum}).shape[0]
print 'ATS', 'Volcanoes', sub_df[sub_df.sensor == 'ats'].groupby(['lats_arcmin', 'lons_arcmin']).agg({'sample_counts_flare': np.sum}).shape[0]
print
sub_df = sls_hotspot_df.merge(sls_volcano_map, on=['lats_arcmin', 'lons_arcmin'])
print 'SLSTR', 'Volcanoes', sub_df[sub_df.sensor == 'sls'].groupby(['lats_arcmin', 'lons_arcmin']).agg({'sample_counts_flare': np.sum}).shape[0]
In [45]:
def plot_data_screening(ratio_df, lab, fname, leg=True):
flare_df = ratio_df[ratio_df.is_flare.astype('bool')]
not_flare_df = ratio_df[~ratio_df.is_flare.astype('bool')]
volc_df = ratio_df[ratio_df.is_volcano]
plt.close('all')
fig = plt.figure(figsize=(15,10))
ax_list = []
ax_list.append(plt.subplot2grid((4, 5), (0, 0), colspan=5, rowspan=3, projection=ccrs.PlateCarree()))
ax_list.append(plt.subplot2grid((4, 5), (3, 0), projection=ccrs.PlateCarree()))
ax_list.append(plt.subplot2grid((4, 5), (3, 1), projection=ccrs.PlateCarree()))
ax_list.append(plt.subplot2grid((4, 5), (3, 2), projection=ccrs.PlateCarree()))
ax_list.append(plt.subplot2grid((4, 5), (3, 3), projection=ccrs.PlateCarree()))
ax_list.append(plt.subplot2grid((4, 5), (3, 4), projection=ccrs.PlateCarree()))
for ax in ax_list:
ax.set_xticks([])
ax.set_yticks([])
# set limites
xlims = [(-180, 180), (-105, -87), (4, 9), (46, 56), (65, 82), (106, 125)]
ylims = [(-90, 90), (25, 39), (3, 7), (23, 33), (55, 68), (33, 45)]
pos = [(-102, 40), (-2, -1), (39, 26), (83, 62), (113, 47)]
for ax, xlim, ylim in zip(ax_list, xlims, ylims):
ax.set_xlim(xlim)
ax.set_ylim(ylim)
ax.coastlines()
# scale the values
flare_counts = flare_df['sample_counts_flare']
ind_counts = ratio_df['sample_counts_flare']
# make main plot
ax_list[0].text(0.94, 0.92, lab, transform=ax_list[0].transAxes, fontsize=25)
ax_list[0].plot(volc_df.lons, volc_df.lats, '.', color='gray', markersize=10, label='Volcanic')
scat = ax_list[0].scatter(flare_df.lons, flare_df.lats,
s=flare_counts/10, lw=0.2,
edgecolors='r',
facecolor='None',
label = 'Flare')
scat = ax_list[0].scatter(not_flare_df.lons, not_flare_df.lats,
s=ind_counts/10, lw=0.2,
edgecolors='b',
facecolor='None',
label = "Non-Flare")
leg0 = ax_list[0].legend(loc = 2, scatterpoints = 1, prop={'size': 13})
leg0.legendHandles[0]._sizes = [100]
leg0.legendHandles[1]._sizes = [100]
leg0.legendHandles[2]._sizes = [100]
leg0.get_frame().set_alpha(1)
if leg:
l1 = ax_list[0].scatter([-170],[-70], s=10/10, edgecolors='k', facecolor='None')
l2 = ax_list[0].scatter([-170],[-70], s=100/10, edgecolors='k', facecolor='None')
l3 = ax_list[0].scatter([-170],[-70], s=500/10, edgecolors='k', facecolor='None')
l4 = ax_list[0].scatter([-170],[-70], s=1000/10, edgecolors='k', facecolor='None')
labels = ["10", "100", "500", "1000"]
leg1 = ax_list[0].legend([l1, l2, l3, l4], labels, frameon=True, fontsize=14,
handlelength=2, loc = 3, borderpad = 1,
handletextpad=1, title='N. Samples', scatterpoints = 1, prop={'size': 15})
leg1.get_frame().set_alpha(1)
ax_list[0].add_artist(leg0)
# add roi boxes
for i, (p, x, y) in enumerate(zip(pos, xlims[1:], ylims[1:])):
ax_list[0].plot([x[0], x[0], x[1], x[1], x[0]],
[y[0], y[1], y[1], y[0], y[0]], 'k-')
ax_list[0].text(p[0], p[1], str(i+1)+'.', fontsize=12)
# make alt plots
for i, ax in enumerate(ax_list[1:]):
scat = ax.scatter(flare_df.lons, flare_df.lats,
s=flare_counts/10, lw=0.2,
edgecolors='r',
facecolor='None')
scat = ax.scatter(not_flare_df.lons, not_flare_df.lats,
s=ind_counts/10, lw=0.2,
edgecolors='b',
facecolor='None')
ax.text(0.1, 0.1, str(i+1)+'.', transform=ax.transAxes, fontsize=15)
plt.savefig(os.path.join(output_path, fname), bbox_inches='tight', dpi=600)
plt.show()
In [895]:
from sklearn.neighbors.kde import KernelDensity
from matplotlib.colors import BoundaryNorm
def estimate_density(df):
lat_vals = df.lats.values
lon_vals = df.lons.values
counts = df.sample_counts_flare.values
# removed the code to calcualte total counts in
# the data, now we are just getting the density of the
# total number of sites.
# long_lat_list = []
# long_lon_list = []
# for lat, lon, c in zip(lat_vals, lon_vals, counts):
# long_lat_list.extend([lat] * c)
# long_lon_list.extend([lon] * c)
# latlon = np.vstack([long_lat_list, long_lon_list]).T
latlon = np.vstack([lat_vals, lon_vals]).T
x = np.arange(-180, 181, )
y = np.arange(-90, 91, 1)
X, Y = np.meshgrid(x, y)
xy = np.radians(np.vstack([Y.ravel(), X.ravel()]).T)
# construct a spherical kernel density estimate of the distribution
kde = KernelDensity(bandwidth=0.005, metric='haversine',
kernel='tophat', algorithm='ball_tree')
kde.fit(np.radians(latlon))
Z = np.exp(kde.score_samples(xy))
Z = Z.reshape(X.shape)
Z = np.ma.masked_array(Z, mask)
return X, Y, Z
def hist_count(df):
bin_x = np.arange(-180, 181, 1)
bin_y = np.arange(-90, 91, 1)
meshed_x, meshed_y = np.meshgrid(bin_x, bin_y)
meshed_x = meshed_x[:-1,:-1] + 0.5
meshed_y = meshed_y[:-1,:-1] + 0.5
binned_data = stats.binned_statistic_2d(df.lons, df.lats,
np.ones(df.lons.size), 'sum',
bins=[bin_x, bin_y]).statistic.T
mask = binned_data == 0
Z = np.ma.masked_array(binned_data, mask)
return meshed_x, meshed_y, Z
def plot_data_screening_v3(df1, df2, df3):
plt.close('all')
flare_df1 = df1[df1.is_flare.astype('bool')]
not_flare_df1 = df1[~df1.is_flare.astype('bool')]
flare_df2 = df2[df2.is_flare.astype('bool')]
not_flare_df2 = df2[~df2.is_flare.astype('bool')]
flare_df3 = df3[df3.is_flare.astype('bool')]
not_flare_df3 = df3[~df3.is_flare.astype('bool')]
XT1, YT1, ZT1 = hist_count(flare_df1)
XF1, YF1, ZF1 = hist_count(not_flare_df1)
# mask each binned array so only dominant class is showing
mask = ZT1 > ZF1
ZT1_mask = np.ma.masked_array(ZT1, ~mask)
mask = ZF1 > ZT1
ZF1_mask = np.ma.masked_array(ZF1, ~mask)
XT2, YT2, ZT2 = hist_count(flare_df2)
XF2, YF2, ZF2 = hist_count(not_flare_df2)
# mask each binned array so only dominant class is showing
mask = ZT2 > ZF2
ZT2_mask = np.ma.masked_array(ZT2, ~mask)
mask = ZF2 > ZT2
ZF2_mask = np.ma.masked_array(ZF2, ~mask)
XT3, YT3, ZT3 = hist_count(flare_df3)
XF3, YF3, ZF3 = hist_count(not_flare_df3)
# mask each binned array so only dominant class is showing
mask = ZT3 > ZF3
ZT3_mask = np.ma.masked_array(ZT3, ~mask)
mask = ZF3 > ZT3
ZF3_mask = np.ma.masked_array(ZF3, ~mask)
# set up figure
plt.close('all')
fig = plt.figure(figsize=(13,20))
fig.subplots_adjust(hspace=0.001, wspace=0.1)
ax1 = plt.subplot(3, 1, 1, projection=ccrs.EqualEarth())
ax2 = plt.subplot(3, 1, 2, projection=ccrs.EqualEarth())
ax3 = plt.subplot(3, 1, 3, projection=ccrs.EqualEarth())
ax_list = [ax1, ax2, ax3]
for i, ax in enumerate(ax_list):
ax.set_global()
ax.outline_patch.set_edgecolor('black')
ax.coastlines(color='black', linewidth=0.5)
ax.gridlines(color='black', linewidth=0.5, alpha=0.5)
ax1.text(0.94, 0.92, '(a)', transform=ax1.transAxes, fontsize=25)
ax2.text(0.94, 0.92, '(b)', transform=ax2.transAxes, fontsize=25)
ax3.text(0.94, 0.92, '(c)', transform=ax3.transAxes, fontsize=25)
img_extent = (-180, 180, -90, 90)
im1T = ax1.imshow(ZT1_mask, origin='lower', cmap='Reds', norm=LogNorm(vmin=1, vmax=500),
extent=img_extent, transform=ccrs.PlateCarree())
im1F = ax1.imshow(ZF1_mask, origin='lower', cmap='Blues', norm=LogNorm(vmin=1, vmax=100),
extent=img_extent, transform=ccrs.PlateCarree())
im2T = ax2.imshow(ZT2_mask, origin='lower', cmap='Reds',norm=LogNorm(vmin=1, vmax=500),
extent=img_extent, transform=ccrs.PlateCarree())
im2F = ax2.imshow(ZF2_mask, origin='lower', cmap='Blues', norm=LogNorm(vmin=1, vmax=100),
extent=img_extent, transform=ccrs.PlateCarree())
im3T = ax3.imshow(ZT3_mask, origin='lower', cmap='Reds', norm=LogNorm(vmin=1, vmax=500),
extent=img_extent, transform=ccrs.PlateCarree())
im3F = ax3.imshow(ZF3_mask, origin='lower', cmap='Blues', norm=LogNorm(vmin=1, vmax=100),
extent=img_extent, transform=ccrs.PlateCarree())
cbaxes1 = fig.add_axes([0.125, 0.1, 0.35, 0.01])
cb1 = plt.colorbar(im1T, cax=cbaxes1, orientation="horizontal")
cb1.ax.tick_params(labelsize=12)
cb1.set_label('Flare Anomaly Count (Log)', fontsize=16)
cbaxes2 = fig.add_axes([0.55, 0.1, 0.35, 0.01])
cb2 = plt.colorbar(im1F, cax = cbaxes2, orientation="horizontal")
cb2.ax.tick_params(labelsize=12)
cb2.set_label('Non-Flare Anomaly Count (Log)', fontsize=16)
fname = 'density_plots.png'
plt.savefig(os.path.join(output_path, fname), bbox_inches='tight', dpi=600)
plt.show()
In [562]:
ats_flaring_map = ats_ratio_df[ats_ratio_df.is_flare.astype('bool')][['lats_arcmin', 'lons_arcmin']]
sls_flaring_map = sls_ratio_df[sls_ratio_df.is_flare.astype('bool')][['lats_arcmin', 'lons_arcmin']]
In [563]:
ats_flare_sampling_df = ats_hotspot_sampling_df.merge(ats_flaring_map, on=['lats_arcmin', 'lons_arcmin'])
sls_flare_sampling_df = sls_hotspot_sampling_df.merge(sls_flaring_map, on=['lats_arcmin', 'lons_arcmin'])
In [564]:
ats_flare_df = ats_hotspot_df.merge(ats_flaring_map, on=['lats_arcmin', 'lons_arcmin'])
sls_flare_df = sls_hotspot_df.merge(sls_flaring_map, on=['lats_arcmin', 'lons_arcmin'])
In [565]:
# set up the slstr annum dataset used in some of the analyses
sls_flare_df_annum = sls_flare_df[((sls_flare_df.year == 2017) & (sls_flare_df.month >= 5)) | (sls_flare_df.year == 2018)]
sls_flare_sampling_df_annum = sls_flare_sampling_df[((sls_flare_sampling_df.year == 2017) & (sls_flare_sampling_df.month >= 5)) | (sls_flare_sampling_df.year == 2018)]
There is irregular behaviour in the AT1 hotspot data in the Northern Hemisphere in some years - for evidence of this see the gif below. During 1994 and 1995 there are odd high FRP detections over much of the northern hemisphere, this leads to erroneoously high FRPs being reported for the US, UK, Canada and Russia. These values need to be exluded. Below first examining the statistics of the dataset it is clear that the tails of the AT1 statistical distributions have much higher values than that of ATS. Looking at the maximum mean values for any individual ATS gas flare we can see that the maximum mean value is around 80 MW. Whilst for AT1 these maximum mean values go up to 670 MW. Looking at the distribution of the standard deviation, the AT1 and and ATS data have similar ranges, but a larger proportion of the AT1 data is located in the tail. For ATS over % of the data has standard deviation values that are less than and using this information we can threshold the AT1 data so that the flaring activity is consistent with that seen on ATS. We can also apply the same approach to AT2 to remove any flaring activity that is not consistent with ATS.
By setting an upper limit on the standard deviation of ???, which comprises ??? % of the ATS data, and upper limits of 80 for the mean, which contain 100% of the AT2 and ATS data we can exclude the AT1 thermal anomalies with odd behviour in terms of the behaviour seen in AT2 and ATS.
In [566]:
print ats_flare_df.drop_duplicates(['lats_arcmin', 'lons_arcmin']).shape[0]
print ats_flare_df[ats_flare_df.sensor == 'ats'].drop_duplicates(['lats_arcmin', 'lons_arcmin']).shape[0]
print ats_flare_df[ats_flare_df.sensor == 'at2'].drop_duplicates(['lats_arcmin', 'lons_arcmin']).shape[0]
print ats_flare_df[ats_flare_df.sensor == 'at1'].drop_duplicates(['lats_arcmin', 'lons_arcmin']).shape[0]
print sls_flare_df.drop_duplicates(['lats_arcmin', 'lons_arcmin']).shape[0]
In [582]:
def errors_plot(at1_stats_df, at2_stats_df, ats_stats_df, sls_stats_df):
fig = plt.figure(figsize=(18, 7))
ax0 = plt.subplot(121)
ax1 = plt.subplot(122)
labels = ['SLSTR Mean', 'AATSR Mean', 'ATSR-2 Mean', 'ATSR-1 Mean',
'SLSTR SD', 'AATSR SD', 'ATSR-2 SD', 'ATSR-1 SD']
no_labels = ['','','','','','','','']
data = [sls_stats_df.frp_mean, ats_stats_df.frp_mean, at2_stats_df.frp_mean, at1_stats_df.frp_mean,
sls_stats_df.frp_sd[sls_stats_df.frp_sd.notnull()], ats_stats_df.frp_sd[ats_stats_df.frp_sd.notnull()],
at2_stats_df.frp_sd[at2_stats_df.frp_sd.notnull()], at1_stats_df.frp_sd[at1_stats_df.frp_sd.notnull()]]
flierprops = dict(marker='o', markerfacecolor='red', markersize=5,
linestyle='none')
#plt.yscale('log')
ax0.set_xlim([1e-4, 1e3])
ax0.set_xscale("log", nonposx='clip')
ax0.boxplot(data, vert=False, flierprops=flierprops, labels=labels, whis=[0.05, 99.95])
ax0.set_xlabel('Log FRP (MW)',fontsize=20)
ax0.tick_params(axis='both', which='major', labelsize=14)
# values from ATS data, Punta de mata flare in venezuela
max_mean = 70.704
max_sd = 93.158
at1_mask = (at1_stats_df.frp_mean < max_mean) & (at1_stats_df.frp_sd < max_sd)
at2_mask = (at2_stats_df.frp_mean < max_mean) & (at2_stats_df.frp_sd < max_sd)
ats_mask = (ats_stats_df.frp_mean < max_mean) & (ats_stats_df.frp_sd < max_sd)
sls_mask = (sls_stats_df.frp_mean < max_mean) & (sls_stats_df.frp_sd < max_sd)
data = [sls_stats_df.frp_mean[sls_mask], ats_stats_df.frp_mean[ats_mask], at2_stats_df.frp_mean[at2_mask], at1_stats_df.frp_mean[at1_mask],
sls_stats_df.frp_sd[sls_mask], ats_stats_df.frp_sd[ats_mask], at2_stats_df.frp_sd[at2_mask], at1_stats_df.frp_sd[at1_mask]]
flierprops = dict(marker='o', markerfacecolor='red', markersize=5,
linestyle='none')
ax1.set_xlim([1e-4, 1e3])
ax1.set_xscale("log", nonposx='clip')
ax1.boxplot(data, vert=False, flierprops=flierprops, labels=no_labels, whis=[0.05, 99.95])
#ax1.tick_params(labelleft='off')
ax1.set_xlabel('Log FRP (MW)', fontsize=20)
ax1.tick_params(axis='both', which='major', labelsize=14)
ax0.text(0.05, 0.05, '(a)', transform=ax0.transAxes, fontsize=18)
ax1.text(0.05, 0.05, '(b)', transform=ax1.transAxes, fontsize=18)
plt.savefig(os.path.join(output_path, 'errors_plot.png'), bbox_inches='tight', dpi=600)
plt.show()
In [568]:
def statistical_screening_plot(df, x, y, inner=True):
plt.close('all')
fig, ax1 = plt.subplots()
h = ax1.hist2d(df[x], df[y], bins=50, cmin=1, norm=colors.LogNorm(), vmin=1, vmax=1000)
cbar = plt.colorbar(h[3], ax=ax1)
if inner:
# These are in unitless percentages of the figure size. (0,0 is bottom left)
left, bottom, width, height = [0.4, 0.4, 0.4, 0.4]
sub_df = df[df[y] < 10]
ax2 = fig.add_axes([left, bottom, width, height])
ax2.hist2d(sub_df[x], sub_df[y], bins=20, cmin=1, norm=colors.LogNorm())
ax1.set_ylabel(y)
ax1.set_xlabel(x)
plt.show()
In [569]:
def generate_stats_df(flare_df):
cols = ['lats_arcmin', 'lons_arcmin', 'lats', 'lons', 'frp', 'year', 'sample_counts_flare']
stats_df = flare_df[cols].copy()
stats_df['frp_sd'] = stats_df['frp']
stats_df['frp_mean'] = stats_df['frp']
stats_df['frp_max'] = stats_df['frp']
stats_df['frp_min'] = stats_df['frp']
stats_df['frp_median'] = stats_df['frp']
stats_df['frp_sum'] = stats_df['frp']
to_group = ['lats_arcmin', 'lons_arcmin']
to_agg = {'frp_sd': np.std,
'frp_mean': np.mean,
'frp_min': np.min,
'frp_max': np.max,
'frp_median': np.median,
'sample_counts_flare': np.sum,
'frp_sum': np.sum,
'lats': np.mean,
'lons': np.mean,
}
stats_df = stats_df.groupby(to_group, as_index=False).agg(to_agg)
return stats_df
In [570]:
at1_stats_df = generate_stats_df(ats_flare_df[ats_flare_df.sensor == 'at1'])
at2_stats_df = generate_stats_df(ats_flare_df[ats_flare_df.sensor == 'at2'])
ats_stats_df = generate_stats_df(ats_flare_df[ats_flare_df.sensor == 'ats'])
sls_stats_df = generate_stats_df(sls_flare_df[sls_flare_df.sensor == 'sls'])
In [583]:
plt.close('all')
errors_plot(at1_stats_df, at2_stats_df, ats_stats_df, sls_stats_df)
In [318]:
# set max values from punta de mata venezuela
max_mean = 70.705
max_sd = 93.16
# do ats
aats_flare_df = ats_flare_df[ats_flare_df.sensor == 'ats']
aats_flare_df = aats_flare_df.merge(ats_stats_df[['frp_mean', 'frp_sd', 'lats_arcmin', 'lons_arcmin']], on=['lats_arcmin', 'lons_arcmin'])
s1 = aats_flare_df.shape[0]
print s1
excluded_df = aats_flare_df[~((aats_flare_df.frp_mean < max_mean) & (aats_flare_df.frp_sd < max_sd))]
print 'total excluded ATS:', excluded_df.drop_duplicates(['lats_arcmin', 'lons_arcmin']).shape[0]
aats_flare_df = aats_flare_df[(aats_flare_df.frp_mean < max_mean) &
(aats_flare_df.frp_sd < max_sd)]
aats_flare_df.drop(['frp_mean', 'frp_sd'], axis=1, inplace=True)
s2 = aats_flare_df.shape[0]
print s2
print s2 * 1.0 / s1 * 100
print
# do at2
at2_flare_df = ats_flare_df[ats_flare_df.sensor == 'at2']
at2_flare_df = at2_flare_df.merge(at2_stats_df[['frp_mean', 'frp_sd', 'lats_arcmin', 'lons_arcmin']], on=['lats_arcmin', 'lons_arcmin'])
s1 = at2_flare_df.shape[0]
print s1
excluded_df = at2_flare_df[~((at2_flare_df.frp_mean < max_mean) & (at2_flare_df.frp_sd < max_sd))]
print 'total excluded AT2:', excluded_df.drop_duplicates(['lats_arcmin', 'lons_arcmin']).shape[0]
at2_flare_df = at2_flare_df[(at2_flare_df.frp_mean < max_mean) &
(at2_flare_df.frp_sd < max_sd)]
at2_flare_df.drop(['frp_mean', 'frp_sd'], axis=1, inplace=True)
s2 = at2_flare_df.shape[0]
print s2
print s2 * 1.0 / s1 * 100
print
# do at1
at1_flare_df = ats_flare_df[ats_flare_df.sensor == 'at1']
at1_flare_df = at1_flare_df.merge(at1_stats_df[['frp_mean', 'frp_sd', 'lats_arcmin', 'lons_arcmin']], on=['lats_arcmin', 'lons_arcmin'])
s1 = at1_flare_df.shape[0]
print s1
excluded_df = at1_flare_df[~((at1_flare_df.frp_mean < max_mean) & (at1_flare_df.frp_sd < max_sd))]
print 'total excluded AT1:', excluded_df.drop_duplicates(['lats_arcmin', 'lons_arcmin']).shape[0]
at1_flare_df = at1_flare_df[(at1_flare_df.frp_mean < max_mean) &
(at1_flare_df.frp_sd < max_sd)]
at1_flare_df.drop(['frp_mean', 'frp_sd'], axis=1, inplace=True)
s2 = at1_flare_df.shape[0]
print s2
print s2 * 1.0 / s1 * 100
print
# join back together
ats_flare_df = pd.concat([aats_flare_df, at2_flare_df, at1_flare_df], ignore_index=True)
Let visualise again to see if the strange AT1 data has been removed:
In [319]:
print ats_flare_df.drop_duplicates(['lats_arcmin', 'lons_arcmin']).shape[0]
print ats_flare_df[ats_flare_df.sensor == 'ats'].drop_duplicates(['lats_arcmin', 'lons_arcmin']).shape[0]
print ats_flare_df[ats_flare_df.sensor == 'at2'].drop_duplicates(['lats_arcmin', 'lons_arcmin']).shape[0]
print ats_flare_df[ats_flare_df.sensor == 'at1'].drop_duplicates(['lats_arcmin', 'lons_arcmin']).shape[0]
print sls_flare_df.drop_duplicates(['lats_arcmin', 'lons_arcmin']).shape[0]
In [743]:
ats_flare_df.frp.describe()
Out[743]:
In [742]:
ats_flare_df[ats_flare_df.frp == 0.349345900631]
Out[742]:
Now lets redo the sampling analysis to consider only persistent hotspots assocaited with flainr! We do not care about the sampling of non-flare hotspots!
In [320]:
# reduce sampling dataframes to hotspot dataframes
ats_flare_sampling_df = ats_hotspot_sampling_df.merge(ats_flare_df[['lats_arcmin', 'lons_arcmin']].drop_duplicates(),
on=['lats_arcmin', 'lons_arcmin'])
In [321]:
sls_flare_sampling_df = sls_hotspot_sampling_df.merge(sls_flare_df[['lats_arcmin', 'lons_arcmin']].drop_duplicates(),
on=['lats_arcmin', 'lons_arcmin'])
Looking at the sampling dataframe first it can be seen that the SLSTR data has too low cloud cover for 2017, cause is currently unknown. THe same is found for AT1, and that can be corrected for using the data from ATS and AT2 as all sensors use the same map so the sampling will be made across all sensors at the given locations. For SLSTR we cannot so easily use the ATX sampling to adjust as the flaring map is different so not all the points observed in SLSTR are avialable in the ATX maps. This suggests that we should use the SLSTR map to generate the sampling data from ATSR as could then use the cloud cover statistics from these ATSR data to update the data for SLSTR. As a workaround, we could just you cloud statistics from the nearest ATX sample to update the cloud cover for SLSTR, and it does need to be updated to get reliable estimates of flaring activity in terms of FRP.
In [322]:
global_sampling = sls_flare_sampling_df.groupby(['year', 'sensor'], as_index=False).agg({'sample_counts_all': np.sum,
'cloud_cover': np.mean})
global_sampling
Out[322]:
We do the same analysis done above for the SLSTR data on the ATX data. It is apparent that the annual mean cloud cover for AT1 is too low. It should be aroun 60% and is typically far lower than this. This is due to deficiencies in the cloud masking avaialble on AT1. So we need to update the sampling cloud cover values for AT1 in an approrpiate way so that they reflect typical cloud cover for a given features. Also note that the total sample counts in some yeasrs is too low (considering only full years of operation). This is indicative of poor sensor behaviour in those years, and must be mentioned in the paper.
In [323]:
# get the global annual FRP values
global_sampling = ats_flare_sampling_df.groupby(['year', 'sensor'], as_index=False).agg({'sample_counts_all': np.sum,
'cloud_cover': np.mean})
global_sampling
Out[323]:
To update the AT1 cloud cover percentages first the non AT1 samples are extracted into a new dataframe. These non AT1 samples are then grouped by lat and lon, taking the mean cloud cover for each sample location. A new sensor column is added to the grouped data, and the sensor is assinged as AT1. Also, the cloud cover column name is reset to updated_cloud_cover. The modified grouped dataframe is then merged back on to the main sampling dataframe by lat lon and sensor. All AT1 rows now have an updated cloud cover value associated with them, and this updated cloud cover is used to provide an update to the original cloud cover column. In effect the cloud cover column in every AT1 row is assigned the updated cloud cover value. Lastly, the updated cloud cover column is dropped from the sampling dataframe as it is no longer needed.
In [324]:
def update_at1_cloud_cover(ats_sampling_df):
not_at1_sampling = ats_sampling_df[ats_sampling_df.sensor !='at1'].copy()
not_at1_sampling_mean_cc = not_at1_sampling.groupby(['lats_arcmin', 'lons_arcmin'], as_index=False).agg({'cloud_cover':np.mean})
not_at1_sampling_mean_cc['sensor'] = 'at1'
not_at1_sampling_mean_cc.rename(columns={'cloud_cover':'updated_cloud_cover'}, inplace=True)
ats_sampling_df = ats_sampling_df.merge(not_at1_sampling_mean_cc, on=['lats_arcmin', 'lons_arcmin', 'sensor'], how='outer')
print 'Number at1 sampling locations no updated:', np.sum((ats_sampling_df.sensor=='at1') & (~ats_sampling_df.updated_cloud_cover.notnull()))
mask = ats_sampling_df.updated_cloud_cover.notnull()
ats_sampling_df.cloud_cover.loc[mask] = ats_sampling_df.updated_cloud_cover.loc[mask]
ats_sampling_df.drop('updated_cloud_cover', axis=1, inplace=True)
return ats_sampling_df
In [325]:
ats_flare_sampling_df = update_at1_cloud_cover(ats_flare_sampling_df)
Checking the sampling data again, we can see that the AT1 cloud cover statistics are now more representative of those across the AT2 and ATS data. So the values at each individual AT1 flare will now be much more realistic.
In [326]:
# get the global annual FRP values
global_sampling = ats_flare_sampling_df.groupby(['year', 'sensor'], as_index=False).agg({'sample_counts_all': np.sum,
'cloud_cover': np.mean})
global_sampling
Out[326]:
In the case of SLSTR we do not have ATS samples avialable for all SLSTR hotspot sampling locations. So instead we locate the approximate nearest hotspot using euclidean distance metric and assigne each SLSTR sampling grid cell the cloud cover from the ATX sampled dataframe. We do not need to be exact here as cloud cover regimes ares consistent over hundreds of kilometres.
In [327]:
def update_slstr_cloud_cover(ats_sampling_df, sls_sampling_df):
# group ATS data to unique sample locations and get cc and loc
ats_sampling_mean_cc = ats_sampling_df.groupby(['lats_arcmin', 'lons_arcmin'], as_index=False).agg({'cloud_cover':np.mean})
ats_cloud_cover = ats_sampling_mean_cc.cloud_cover.values
ats_lats = ats_sampling_mean_cc.lats_arcmin.values
ats_lons = ats_sampling_mean_cc.lons_arcmin.values
# group sls data to unique sample locations
sls_sampling_mean_cc = sls_sampling_df.groupby(['lats_arcmin', 'lons_arcmin'], as_index=False).agg({'cloud_cover':np.mean})
sls_sampling_mean_cc.drop('cloud_cover', axis=1, inplace=True)
sls_lats = sls_sampling_mean_cc.lats_arcmin.values
sls_lons = sls_sampling_mean_cc.lons_arcmin.values
ats_lat_lon = np.dstack([np.deg2rad(ats_lats),
np.deg2rad(ats_lons)])[0]
ats_balltree = BallTree(ats_lat_lon, metric='euclidean')
# get the unique flare lats and lons for assessment in kdtree
sls_locations = np.array(zip(np.deg2rad(sls_lats),
np.deg2rad(sls_lons)))
# compare the flare locations to the potential locations in the orbit
distances, indexes = ats_balltree.query(sls_locations, k=1)
# get update cc and append to df
updated_cc = ats_cloud_cover[indexes]
sls_sampling_mean_cc['cloud_cover'] = updated_cc
# merge updated grouped data onto sls dataframe
sls_sampling_df.drop('cloud_cover', axis=1, inplace=True)
sls_sampling_df = sls_sampling_df.merge(sls_sampling_mean_cc, on=['lats_arcmin', 'lons_arcmin'], how='outer')
return sls_sampling_df
In [328]:
sls_flare_sampling_df = update_slstr_cloud_cover(ats_flare_sampling_df, sls_flare_sampling_df.copy())
In [329]:
# get the global annual FRP values
global_sampling = sls_flare_sampling_df.groupby(['year', 'sensor'], as_index=False).agg({'sample_counts_all': np.sum,
'cloud_cover': np.mean})
global_sampling
Out[329]:
In [330]:
print 'n persistent ats:', ats_flare_sampling_df.drop_duplicates(['lats_arcmin', 'lons_arcmin']).shape[0]
gp = ats_flare_sampling_df.groupby(['lats_arcmin', 'lons_arcmin']).agg({'sample_counts_all':np.sum})
print gp.sample_counts_all.mean()
print gp.sample_counts_all.std()
print 'n persistent sls:', sls_flare_sampling_df.drop_duplicates(['lats_arcmin', 'lons_arcmin']).shape[0]
gp = sls_flare_sampling_df.groupby(['lats_arcmin', 'lons_arcmin']).agg({'sample_counts_all':np.sum})
print gp.sample_counts_all.mean()
print gp.sample_counts_all.std()
In [331]:
def create_header(sensor, fname, a=True):
if sensor in ['at1', 'at2', 'ats']:
if a:
text = ["Conventions,G,BADC-CSV,1,",
"title,G,ATSR-SLSTR Global Gas Flaring Activity Dataset,",
"creator,G,Daniel Fisher,King's College London,",
"activity,G,KCL Global Gas Flaring,",
"location,G,global,",
"null_value,G,-999,",
"last_revised_date,G,{0},".format(datetime.now().strftime("%Y-%m-%d %H:%M:%S")),
"comments,G,Data record of active thermal anomalies and associated fire radiative power,",
"comments,G,from the Along Track Scanning and Sea and Land Surface Temperature Radiometers,",
"long_name,lat_arcmin,latitude arcminute grid cell index for pixel,",
"long_name,lon_arcmin,longitude arcminute grid cell index for pixel,",
"long_name,bg_mwir_rad_3_7,mean background 3.7 um spectral radiance,W m^-2 sr^-1 um^-1,",
"long_name,pixel_area,thermal anomaly pixel area,m^2,",
"long_name,lat,thermal anomaly containing pixel latitude,degrees,",
"coordinate_variable,lat,y,",
"long_name,swir_rad_1_6,thermal anomaly containing pixel 1.6 um spectral radiance,W m^-2 sr^-1 um^-1,",
"long_name,frp,thermal anomaly containing pixel fire radiative power,W m^-2,",
"long_name,mwir_rad_3_7,thermal anomaly containing pixel 3.7 um spectral radiance,W m^-2 sr^-1 um^-1,",
"long_name,lon,thermal anomaly containing pixel longitude,degrees,",
"coordinate_variable,lon,x,",
"long_name,year,year of satellite overpass,",
"long_name,month,month of satellite overpass,",
"long_name,day,day of satellite overpass,",
"long_name,hhmm,hour and minute of satellite overpass,",
"long_name,sensor,satellite abbreviation,",
"data,",
]
else:
text = ["Conventions,G,BADC-CSV,1,",
"title,G,ATSR-SLSTR Global Gas Flaring Sampling Dataset,",
"creator,G,Daniel Fisher, King's College London,",
"activity,G,KCL Global Gas Flaring,",
"location,G,global,",
"null_value,G,-999,",
"last_revised_date,G,{0},".format(datetime.now().strftime("%Y-%m-%d %H:%M:%S")),
"comments,G,Data record of samples of grid cells known to contain thermal anomalies,",
"comments,G,and measures of cloud cover in proximity (±8 pixels) to the anomaly location,",
"comments,G,from the Along Track Scanning and Sea and Land Surface Temperature Radiometers.,",
"comments,G,Only for use in conjunction with the activity dataset.,",
"long_name,lat_arcmin,latitude arcminute grid cell index, ",
"coordinate_variable,lat_arcmin,y,",
"long_name,lon_arcmin,longitude arcminute grid cell index,",
"coordinate_variable,lon_arcmin,x,",
"long_name,cloud_cover,mean fractional cloud cover for grid cell for overpass,",
"long_name,year,year of satellite overpass,",
"long_name,month,month of satellite overpass,",
"long_name,day,day of satellite overpass,",
"long_name,hhmm,hour and minute of satellite overpass,",
"long_name,sensor,satellite abbreviation,",
"data,",
]
else:
if a:
text = ["Conventions,G,BADC-CSV,1,",
"title,G,ATSR-SLSTR Global Gas Flaring Activity Dataset,",
"creator,G,Daniel Fisher,King's College London,",
"activity,G,KCL Global Gas Flaring,",
"location,G,global,",
"null_value,G,-999,",
"last_revised_date,G,{0},".format(datetime.now().strftime("%Y-%m-%d %H:%M:%S")),
"comments,G,Data record of pixels with active thermal anomalies and associated fire radiative power,",
"comments,G,from the Along Track Scanning and Sea and Land Surface Temperature Radiometers,",
"long_name,lat_arcmin,latitude arcminute grid cell index for pixel,",
"long_name,lon_arcmin,longitude arcminute grid cell index for pixel,",
"long_name,swir_rad_2_2,thermal anomaly containing pixel 2.2 um spectral radiance,W m^-2 sr^-1 um^-1,",
"long_name,frp,thermal anomaly containing pixel fire radiative power,W m^-2,",
"long_name,lat,thermal anomaly containing pixel latitude,degrees,",
"coordinate_variable,lat,y,",
"long_name,swir_rad_1_6,thermal anomaly containing pixel 1.6 um spectral radiance,W m^-2 sr^-1 um^-1,",
"long_name,lon,thermal anomaly containing pixel longitude,degrees,",
"coordinate_variable,lon,x,",
"long_name,pixel_area,thermal anomaly pixel area,m^2,",
"long_name,year,year of satellite overpass,",
"long_name,month,month of satellite overpass,",
"long_name,day,day of satellite overpass,",
"long_name,hhmm,hour and minute of satellite overpass,",
"long_name,sensor,satellite abbreviation,",
"data,",
]
else:
text = ["Conventions,G,BADC-CSV,1,",
"title,G,ATSR-SLSTR Global Gas Flaring Sampling Dataset,",
"creator,G,Daniel Fisher, King's College London,",
"activity,G,KCL Global Gas Flaring,",
"location,G,global,",
"null_value,G,-999,",
"last_revised_date,G,{0},".format(datetime.now().strftime("%Y-%m-%d %H:%M:%S")),
"comments,G,Data record of samples of grid cells known to contain thermal anomalies,",
"comments,G,and measures of cloud cover in proximity (±8 pixels) to the anomaly location,",
"comments,G,from the Along Track Scanning and Sea and Land Surface Temperature Radiometers.,",
"comments,G,Only for use in conjunction with the activity dataset.,",
"long_name,lat_arcmin,latitude arcminute grid cell index,",
"coordinate_variable,lat_arcmin,y,",
"long_name,lon_arcmin,longitude arcminute grid cell index,",
"coordinate_variable,lon_arcmin,x,",
"long_name,cloud_cover,mean fractional cloud cover for grid cell for overpass,",
"long_name,year,year of satellite overpass,",
"long_name,month,month of satellite overpass,",
"long_name,day,day of satellite overpass,",
"long_name,hhmm,hour and minute of satellite overpass,",
"long_name,sensor,satellite abbreviation,",
"data,",
]
with open(os.path.join(output_path_ds, fname),'wb') as f:
for line in text:
f.write(line)
f.write('\n')
In [332]:
ats_flare_df.columns
Out[332]:
In [333]:
ats_flare_df_out = ats_flare_df.copy()
ats_flare_df_out.drop(ats_flare_df_out.columns[[0, 3, 4, 8, 9, -3, -2, -1]], axis=1, inplace=True)
ats_flare_df_out.head()
ats_flare_df_out.rename(columns={'lats_arcmin': 'lat_arcmin',
'lons_arcmin': 'lon_arcmin',
'bg_size_used': 'bg_win_size',
'inval_pixels_bg_pc': 'bg_invalid',
'lats': 'lat',
'lons': 'lon',
'hotspot_bg_pc': 'bg_hotspot',
'pixel_size': 'pixel_area',
'swir_radiances': 'swir_rad_1_6',
'mwir_radiances': 'mwir_rad_3_7',
'mwir_bg': 'bg_mwir_rad_3_7'}, inplace=True)
ats_flare_df_out.bg_mwir_rad_3_7[ats_flare_df_out.bg_mwir_rad_3_7 == -1] = -999
In [73]:
years = np.arange(1991, 2013)
months = np.arange(1,13)
for y in years:
y_f = str(y).zfill(2)
for m in months:
# subset data frame
ym_df = ats_flare_df_out[(ats_flare_df_out.month == m) & (ats_flare_df_out.year == y)]
if ym_df.empty:
continue
m_f = str(m).zfill(2)
# split by sensor
if 'at1' in ym_df.sensor.values:
# create csv
at1_fname = 'at1_global_{0}{1}01_ggf_activity_monthly_v1.csv'.format(y_f, m_f)
create_header('at1', at1_fname)
# append dataframe and footer to csv
at1_ym_df = ym_df[ym_df.sensor == 'at1']
with open(os.path.join(output_path_ds, at1_fname), 'a') as f:
at1_ym_df.to_csv(f, index=False)
f.write('end data,')
if 'at2' in ym_df.sensor.values:
# create csv
at2_fname = 'at2_global_{0}{1}01_ggf_activity_monthly_v1.csv'.format(y_f, m_f)
create_header('at2', at2_fname)
# append dataframe and footer to csv
at2_ym_df = ym_df[ym_df.sensor == 'at2']
with open(os.path.join(output_path_ds, at2_fname), 'a') as f:
at2_ym_df.to_csv(f, index=False)
f.write('end data,')
if 'ats' in ym_df.sensor.values:
# create csv
ats_fname = 'ats_global_{0}{1}01_ggf_activity_monthly_v1.csv'.format(y_f, m_f)
create_header('ats', ats_fname)
# append dataframe and footer to csv
ats_ym_df = ym_df[ym_df.sensor == 'ats']
with open(os.path.join(output_path_ds, ats_fname), 'a') as f:
ats_ym_df.to_csv(f, index=False)
f.write('end data,')
print 'done'
In [ ]:
ats_flare_sampling_df_out = ats_flare_sampling_df.copy()
ats_flare_sampling_df_out.drop(ats_flare_sampling_df_out.columns[[0, -1]], axis=1, inplace=True)
ats_flare_sampling_df_out.rename(columns={'lats_arcmin': 'lat_arcmin',
'lons_arcmin': 'lon_arcmin'}, inplace=True)
In [ ]:
years = np.arange(1991, 2013)
months = np.arange(1,13)
for y in years:
y_f = str(y).zfill(2)
for m in months:
# subset data frame
ym_df = ats_flare_sampling_df_out[(ats_flare_sampling_df_out.month == m) & (ats_flare_sampling_df_out.year == y)]
if ym_df.empty:
continue
m_f = str(m).zfill(2)
# split by sensor
if 'at1' in ym_df.sensor.values:
# create csv
at1_fname = 'at1_global_{0}{1}01_ggf_sampling_monthly_v1.csv'.format(y_f, m_f)
create_header('at1', at1_fname, a=False)
# append dataframe and footer to csv
at1_ym_df = ym_df[ym_df.sensor == 'at1']
with open(os.path.join(output_path_ds, at1_fname), 'a') as f:
at1_ym_df.to_csv(f, index=False)
f.write('end data,')
if 'at2' in ym_df.sensor.values:
# create csv
at2_fname = 'at2_global_{0}{1}01_ggf_sampling_monthly_v1.csv'.format(y_f, m_f)
create_header('at2', at2_fname, a=False)
# append dataframe and footer to csv
at2_ym_df = ym_df[ym_df.sensor == 'at2']
with open(os.path.join(output_path_ds, at2_fname), 'a') as f:
at2_ym_df.to_csv(f, index=False)
f.write('end data,')
if 'ats' in ym_df.sensor.values:
# create csv
ats_fname = 'ats_global_{0}{1}01_ggf_sampling_monthly_v1.csv'.format(y_f, m_f)
create_header('ats', ats_fname, a=False)
# append dataframe and footer to csv
ats_ym_df = ym_df[ym_df.sensor == 'ats']
with open(os.path.join(output_path_ds, ats_fname), 'a') as f:
ats_ym_df.to_csv(f, index=False)
f.write('end data,')
print 'done'
In [ ]:
sls_flare_df_out = sls_flare_df.copy()
sls_flare_df_out.drop(sls_flare_df_out.columns[[0, 6, 7, 9, -2, -1]], axis=1, inplace=True)
sls_flare_df_out.rename(columns={'lats_arcmin': 'lat_arcmin',
'lons_arcmin': 'lon_arcmin',
'lats': 'lat',
'lons': 'lon',
'pixel_size': 'pixel_area',
'swir_radiances': 'swir_rad_1_6',
'swir_radiances_22': 'swir_rad_2_2'}, inplace=True)
In [ ]:
years = np.arange(2017, 2019)
months = np.arange(1,13)
for y in years:
y_f = str(y).zfill(2)
for m in months:
# subset data frame
ym_df = sls_flare_df_out[(sls_flare_df_out.month == m) & (sls_flare_df_out.year == y)]
if ym_df.empty:
continue
m_f = str(m).zfill(2)
# split by sensor
if 'sls' in ym_df.sensor.values:
# create csv
sls_fname = 'sls_global_{0}{1}01_ggf_activity_monthly_v1.csv'.format(y_f, m_f)
create_header('sls', sls_fname)
# append dataframe and footer to csv
sls_ym_df = ym_df[ym_df.sensor == 'sls']
with open(os.path.join(output_path_ds, sls_fname), 'a') as f:
sls_ym_df.to_csv(f, index=False)
f.write('end data,')
In [ ]:
sls_flare_sampling_df_out = sls_flare_sampling_df.copy()
sls_flare_sampling_df_out.drop(sls_flare_sampling_df_out.columns[[0, -2]], axis=1, inplace=True)
sls_flare_sampling_df_out.rename(columns={'lats_arcmin': 'lat_arcmin',
'lons_arcmin': 'lon_arcmin',
}, inplace=True)
In [ ]:
years = np.arange(2017, 2019)
months = np.arange(1,13)
for y in years:
y_f = str(y).zfill(2)
for m in months:
# subset data frame
ym_df = sls_flare_sampling_df_out[(sls_flare_sampling_df_out.month == m) & (sls_flare_sampling_df_out.year == y)]
if ym_df.empty:
continue
m_f = str(m).zfill(2)
# split by sensor
if 'sls' in ym_df.sensor.values:
# create csv
sls_fname = 'sls_global_{0}{1}01_ggf_sampling_monthly_v1.csv'.format(y_f, m_f)
create_header('sls', sls_fname, a=False)
# append dataframe and footer to csv
sls_ym_df = ym_df[ym_df.sensor == 'sls']
with open(os.path.join(output_path_ds, sls_fname), 'a') as f:
sls_ym_df.to_csv(f, index=False)
f.write('end data,')
In [610]:
ats_hotspot_sampling_df.groupby(['year', 'month', 'day']).hhmm.nunique().reset_index()
Out[610]:
In [612]:
#ats_monthly_counts = ats_hotspot_sampling_df.groupby(['year', 'month', 'day'], as_index=False).agg({'sample_counts_all': np.sum})
ats_monthly_counts = ats_hotspot_sampling_df.groupby(['year', 'month', 'day']).hhmm.nunique().reset_index()
ats_monthly_counts['dt'] = pd.to_datetime(ats_monthly_counts.year*10000+
ats_monthly_counts.month*100+
ats_monthly_counts.day,format='%Y%m%d')
#sls_monthly_counts = sls_hotspot_sampling_df.groupby(['year', 'month', 'day'], as_index=False).agg({'sample_counts_all': np.sum})
#sls_monthly_counts = sls_hotspot_sampling_df.groupby(['year', 'month', 'day']).hhmm.nunique().reset_index()
sls_monthly_counts['dt'] = pd.to_datetime(sls_monthly_counts.year*10000+
sls_monthly_counts.month*100+
sls_monthly_counts.day,format='%Y%m%d')
plt.figure(figsize=(20,5))
plt.plot(ats_monthly_counts.dt, ats_monthly_counts.hhmm, 'r.')
#plt.plot(sls_monthly_counts.dt, sls_monthly_counts.sample_counts_all, 'b.')
plt.show()
# plt.close('all')
# #plt.figure(figsize=(30,10))
# # Initialize the FacetGrid object
# pal = sns.cubehelix_palette(1, rot=-.25, light=.7)
# g = sns.FacetGrid(ats_monthly_counts, row="Year", hue="Year", aspect=15, height=0.8, palette=pal)
# g.map(sns.barplot, "Month", "sample_counts_all")
# #g.map(sns.kdeplot, "x", clip_on=False, color="w", lw=2, bw=.2)
# #g.map(plt.axhline, y=0, lw=2, clip_on=False)
# # Define and use a simple function to label the plot in axes coordinates
# def label(x, color, label):
# ax = plt.gca()
# ax.text(0, .2, label, fontweight="bold", color='k',
# ha="left", va="center", transform=ax.transAxes)
# g.map(label, "Month")
# g.set_titles("")
# plt.savefig(os.path.join(output_path, 'monthly_sample_counts_ats.png'), dpi=600)
# plt.show()
In [74]:
ats_monthly_counts = ats_flare_df.groupby(['year', 'month'], as_index=False).agg({'sample_counts_flare': np.sum})
ats_monthly_counts['yearmonth'] =ats_monthly_counts.year*100+ats_monthly_counts.month
ats_monthly_counts.set_index('yearmonth', inplace=True)
ats_monthly_counts.drop(['year', 'month'], axis=1, inplace=True)
In [75]:
year = (np.arange(1991,2013,1)*100).repeat(12)
month = np.tile(np.arange(1,13,1), 2013-1991)
idx = year + month
In [76]:
ats_monthly_counts = ats_monthly_counts.reindex(idx, fill_value=0)
ats_monthly_counts['Year'] = year / 100
ats_monthly_counts['Month'] = month
In [557]:
plt.close('all')
#plt.figure(figsize=(30,10))
# Initialize the FacetGrid object
pal = sns.cubehelix_palette(1, rot=-.25, light=.7)
g = sns.FacetGrid(ats_monthly_counts, row="Year", hue="Year", aspect=15, height=0.8, palette=pal)
g.map(sns.barplot, "Month", "sample_counts_flare")
#g.map(sns.kdeplot, "x", clip_on=False, color="w", lw=2, bw=.2)
#g.map(plt.axhline, y=0, lw=2, clip_on=False)
# Define and use a simple function to label the plot in axes coordinates
def label(x, color, label):
ax = plt.gca()
ax.text(0, .2, label, fontweight="bold", color='k',
ha="left", va="center", transform=ax.transAxes)
g.map(label, "Month")
g.set_titles("")
plt.savefig(os.path.join(output_path, 'monthly_flare_counts_ats.png'), dpi=600)
plt.show()
In [78]:
## ATS Flare persistency
In [79]:
args = ['lats_arcmin', 'lons_arcmin', 'year', 'month', 'day']
ats_flare_locations = ats_flare_df.drop_duplicates(args)[args]
ats_flare_locations['min_time'] = pd.to_datetime((ats_flare_locations.year*10000+ats_flare_locations.month*100+ats_flare_locations.day).apply(str),format='%Y%m%d')
ats_flare_locations['max_time'] = pd.to_datetime((ats_flare_locations.year*10000+ats_flare_locations.month*100+ats_flare_locations.day).apply(str),format='%Y%m%d')
ats_flare_locations = ats_flare_locations.groupby(['lats_arcmin', 'lons_arcmin'], as_index=False).agg({'min_time':np.min, 'max_time': np.max})
t_dff = ats_flare_locations.max_time - ats_flare_locations.min_time
In [80]:
t_dff.describe()
Out[80]:
First plot VIIRS flares into flaring and non-flaring hotspots:
In [81]:
viirs_industrial_df['is_flare'] = viirs_industrial_df.name_id == 4
viirs_industrial_df['sample_counts_flare'] = 10
viirs_industrial_df = dist_to_nearest_volcano(volc_df, viirs_industrial_df)
In [897]:
plot_data_screening_v3(ats_ratio_df, sls_ratio_df, viirs_industrial_df)
In [86]:
def within_dist(target_df, observed_df):
'''
The target dataframe contains the assumed TRUE flares. We compare all flares in
the observed dataframe to this flares in the target df, and if the observed flare
is within 2km of any target flares then it is flagged as being a match (i.e. observes
a flare in the same location as the target df)
'''
target_lat_lon = np.dstack([target_df.lats_arcmin.values, target_df.lons_arcmin.values])[0]
target_balltree = BallTree(target_lat_lon, metric='euclidean')
# get the unique flare lats and lons for assessment in kdtree
observed_locations = np.array(zip(observed_df.lats_arcmin.values, observed_df.lons_arcmin.values))
# compare the flare locations to the potential locations in the orbit
distances, indexes = target_balltree.query(observed_locations, k=1)
# set up the dataframe to hold the distances
mask = distances < np.sqrt(2) # if within 1 grid cell (sqrt 2 includes corners)
observed_df['within_arcmin'] = mask
return observed_df
In [87]:
agg_dict = {'name_id': lambda x:stats.mode(x)[0][0],
'lats': np.mean,
'lons': np.mean}
viirs_unique_flares = viirs_industrial_df.groupby(['lats_arcmin', 'lons_arcmin'], as_index=False).agg(agg_dict)
viirs_unique_flares = viirs_unique_flares[viirs_unique_flares.name_id == 4]
viirs_unique_flares.drop('name_id', axis=1, inplace=True)
ats_unique_flares = ats_flare_df.drop_duplicates(['lats_arcmin', 'lons_arcmin'])[['lats_arcmin', 'lons_arcmin', 'lats','lons']]
sls_unique_flares = sls_flare_df.drop_duplicates(['lats_arcmin', 'lons_arcmin'])[['lats_arcmin', 'lons_arcmin', 'lats','lons']]
In [88]:
# adding country level data
ats_unique_flares = ats_unique_flares.merge(ats_countries_df[['countries', 'lats_arcmin', 'lons_arcmin']], on=['lats_arcmin', 'lons_arcmin'])
sls_unique_flares = sls_unique_flares.merge(sls_countries_df[['countries', 'lats_arcmin', 'lons_arcmin']], on=['lats_arcmin', 'lons_arcmin'])
viirs_unique_flares = viirs_unique_flares.merge(viirs_countries_df[['countries', 'lats_arcmin', 'lons_arcmin']], on=['lats_arcmin', 'lons_arcmin'])
In [89]:
# dist_to_nearest(target, orbsered)
viirs_unique_flares_ats = within_dist(ats_unique_flares.copy(), viirs_unique_flares.copy())
viirs_unique_flares_sls = within_dist(sls_unique_flares.copy(), viirs_unique_flares.copy())
sls_unique_flares_viirs = within_dist(viirs_unique_flares.copy(), sls_unique_flares.copy())
sls_unique_flares_ats = within_dist(ats_unique_flares.copy(), sls_unique_flares.copy())
ats_unique_flares_viirs = within_dist(viirs_unique_flares.copy(), ats_unique_flares.copy())
ats_unique_flares_sls = within_dist(sls_unique_flares.copy(), ats_unique_flares.copy())
In [90]:
print 'ats matched / viirs observed:', np.sum(viirs_unique_flares_ats.within_arcmin) * 1.0/ viirs_unique_flares_ats.shape[0]* 100,'%', np.sum(viirs_unique_flares_ats.within_arcmin), viirs_unique_flares_ats.shape[0]
print 'sls matched / viirs observed:', np.sum(viirs_unique_flares_sls.within_arcmin) * 1.0/ viirs_unique_flares_sls.shape[0]* 100,'%', np.sum(viirs_unique_flares_sls.within_arcmin), viirs_unique_flares_sls.shape[0]
print 'viirs matched / ats observed:', np.sum(ats_unique_flares_viirs.within_arcmin) * 1.0/ ats_unique_flares_viirs.shape[0]* 100,'%', np.sum(ats_unique_flares_viirs.within_arcmin), ats_unique_flares_viirs.shape[0]
print 'sls matched / ats observed:', np.sum(ats_unique_flares_sls.within_arcmin) * 1.0/ ats_unique_flares_sls.shape[0]* 100,'%', np.sum(ats_unique_flares_sls.within_arcmin), ats_unique_flares_sls.shape[0]
print 'viirs matched / sls observed:', np.sum(sls_unique_flares_viirs.within_arcmin) * 1.0/ sls_unique_flares_viirs.shape[0]* 100,'%', np.sum(sls_unique_flares_viirs.within_arcmin), sls_unique_flares_viirs.shape[0]
print 'ats matched / sls observed:', np.sum(sls_unique_flares_ats.within_arcmin) * 1.0/ sls_unique_flares_ats.shape[0]* 100,'%', np.sum(sls_unique_flares_ats.within_arcmin), sls_unique_flares_ats.shape[0]
In [91]:
# get top countries according to VIIRS
viirs_top_20 = viirs_unique_flares.groupby('countries').countries.count().sort_values(ascending=False)[0:20]
top_20_countries = viirs_top_20.index.values
top_20_countries.sort()
print 'VIIRS top 20 covers:', viirs_top_20.sum() * 1.0 / viirs_unique_flares.shape[0], '% of flares'
In [92]:
viirs_unique_flares_ats_t20 = viirs_unique_flares_ats[viirs_unique_flares_ats['countries'].isin(top_20_countries)]
viirs_unique_flares_sls_t20 = viirs_unique_flares_sls[viirs_unique_flares_sls['countries'].isin(top_20_countries)]
sls_unique_flares_viirs_t20 = sls_unique_flares_viirs[sls_unique_flares_viirs['countries'].isin(top_20_countries)]
sls_unique_flares_ats_t20 = sls_unique_flares_ats[sls_unique_flares_ats['countries'].isin(top_20_countries)]
ats_unique_flares_viirs_t20 = ats_unique_flares_viirs[ats_unique_flares_viirs['countries'].isin(top_20_countries)]
ats_unique_flares_sls_t20 = ats_unique_flares_sls[ats_unique_flares_sls['countries'].isin(top_20_countries)]
In [93]:
viirs_ats_ratio = []
viirs_sls_ratio = []
sls_viirs_ratio = []
sls_ats_ratio = []
ats_viirs_ratio = []
ats_sls_ratio = []
for c in top_20_countries:
c_df = viirs_unique_flares_ats_t20[viirs_unique_flares_ats_t20.countries == c]
viirs_ats_ratio.append(c_df.within_arcmin.sum() * 1.0 / c_df.shape[0]* 100)
c_df = viirs_unique_flares_sls_t20[viirs_unique_flares_sls_t20.countries == c]
viirs_sls_ratio.append(c_df.within_arcmin.sum() * 1.0 / c_df.shape[0]* 100)
c_df = sls_unique_flares_viirs_t20[sls_unique_flares_viirs_t20.countries == c]
sls_viirs_ratio.append(c_df.within_arcmin.sum() * 1.0 / c_df.shape[0]* 100)
c_df = sls_unique_flares_ats_t20[sls_unique_flares_ats_t20.countries == c]
sls_ats_ratio.append(c_df.within_arcmin.sum() * 1.0 / c_df.shape[0] * 100)
c_df = ats_unique_flares_viirs_t20[ats_unique_flares_viirs_t20.countries == c]
ats_viirs_ratio.append(c_df.within_arcmin.sum() * 1.0 / c_df.shape[0]* 100)
c_df = ats_unique_flares_sls_t20[ats_unique_flares_sls_t20.countries == c]
ats_sls_ratio.append(c_df.within_arcmin.sum() * 1.0 / c_df.shape[0]* 100)
In [94]:
for i, c in enumerate(top_20_countries):
print c, viirs_ats_ratio[i], viirs_sls_ratio[i], sls_ats_ratio[i], sls_viirs_ratio[i], ats_sls_ratio[i], ats_viirs_ratio[i]
In [95]:
# do the plot
plt.close('all')
fig = plt.figure(figsize=(14, 7))
ax0 = plt.subplot(131)
ax1 = plt.subplot(132)
ax2 = plt.subplot(133)
ind = np.arange(0, 20)
ax0.barh(ind, viirs_sls_ratio, height=0.4, align='edge', color='gray', label = 'S/V')
ax0.barh(ind-0.4, viirs_ats_ratio, height=0.4, align='edge', color='r', label='A/V')
ax0.set_yticks(ind)
ax0.set_xlim([0,100])
ax0.set_ylim([-1,20.5])
ax0.plot([50, 50], [-10, 30], 'k--')
ax0.set_yticklabels(top_20_countries)
ax0.legend()
ax0.set(ylabel='Top 20 Countries', xlabel='Percent Matched')
ax0.yaxis.label.set_size(12)
ax0.xaxis.label.set_size(12)
ax1.barh(ind, sls_viirs_ratio, height=0.4, align='edge', color='gray', label='V/S')
ax1.barh(ind-0.4, sls_ats_ratio, height=0.4, align='edge', color='b', label='A/S')
ax1.set_yticks(ind)
ax1.set_xlim([0,100])
ax1.set_ylim([-1,20.5])
ax1.plot([50, 50], [-10, 30], 'k--')
ax1.set_yticklabels('')
ax1.legend()
ax1.set(xlabel='Percent Matched')
ax1.xaxis.label.set_size(12)
ax2.barh(ind, ats_viirs_ratio, height=0.4, align='edge', color='gray', label='V/A')
ax2.barh(ind-0.4, ats_sls_ratio, height=0.4, align='edge', color='y', label='S/A')
ax2.set_yticks(ind)
ax2.set_xlim([0,100])
ax2.set_ylim([-1,20.5])
ax2.plot([50, 50], [-10, 30], 'k--')
ax2.set_yticklabels('')
ax2.legend()
ax2.set(xlabel='Percent Matched')
ax2.xaxis.label.set_size(12)
ax0.text(0.85, 0.02, '(a)', transform=ax0.transAxes, fontsize=18)
ax1.text(0.85, 0.02, '(b)', transform=ax1.transAxes, fontsize=18)
ax2.text(0.85, 0.02, '(c)', transform=ax2.transAxes, fontsize=18)
plt.savefig(os.path.join(output_path, 'detection_bar_charts.png'), bbox_inches='tight', dpi=600)
plt.show()
From the above ATSR sees a lot of the VIIRS and SLSTR flares. Problem area seems to be the states. Why?
We have the December 2017 VIIRS data. Assume that over the States in the month of December there are limited wildfires. We restrict to only those points that are >1400K to be consistent with the thresholds applied to VIIRS. The SLSTR data is screened by needing to have at least two detections and a suitable threshold that places it between 1400 and 2800K. Using this assumed flare data subset over North America.
Ths can be visualsied by two cumulative histograms and individual statistics reported
Ideas for the observed behaviour:
Flares could be operating more intermittantly. So we are not getting the correct behaviour as we are observing less frequnetly, so are missing a lot of the flaring activity. This can be checked by processing all of the SLSTR data and evaluating off swath. Or all of the VIIRS data and evaluting only to the same view angles.
In [96]:
# Get SLSTR first (using annum data)
sls_df_subset = sls_flare_df_annum[['frp', 'year', 'month', 'lats_arcmin', 'lons_arcmin']]
sls_df_subset = sls_df_subset.merge(sls_countries_df[['countries', 'lats_arcmin', 'lons_arcmin']],
on=['lats_arcmin', 'lons_arcmin'])
sls_df_subset = sls_df_subset[((sls_df_subset.countries == 'United States') |
(sls_df_subset.countries == 'Canada')) &
(sls_df_subset.year == 2017) &
(sls_df_subset.month == 12)
]
sls_df_grouped = sls_df_subset.groupby(['lats_arcmin', 'lons_arcmin'], as_index=False).agg({'frp': np.mean})
# get Nightfire DS and add VZA data
nightfire_df_subset = nightfire_df[['RH', 'Temp_BB', 'lats_arcmin', 'lons_arcmin', 'Sample_M10']]
nightfire_df_subset = nightfire_df_subset.merge(nightfire_countries_df[['countries', 'lats_arcmin', 'lons_arcmin']],
on=['lats_arcmin', 'lons_arcmin'])
vza = viirs_zenith_angles[nightfire_df_subset.Sample_M10.values.astype(int)-1].astype('float')
nightfire_df_subset['vza'] = vza
# all viirs flare data
nightfire_df_subset_all = nightfire_df_subset[(nightfire_df_subset.RH != 999999) &
(nightfire_df_subset.Temp_BB > 1600) &
((nightfire_df_subset.countries == 'United States') |
(nightfire_df_subset.countries == 'Canada'))]
nightfire_df_grouped_all = nightfire_df_subset_all.groupby(['lats_arcmin', 'lons_arcmin'],
as_index=False).agg({'RH': np.mean})
# all VIIRS in VZA
nightfire_df_subset_vza = nightfire_df_subset_all[nightfire_df_subset_all.vza < 22]
nightfire_df_grouped_vza = nightfire_df_subset_vza.groupby(['lats_arcmin', 'lons_arcmin'],
as_index=False).agg({'RH': np.mean})
# # all VIIRS > min SLS FRP value
nightfire_df_subset_min = nightfire_df_subset_vza[nightfire_df_subset_vza.RH > sls_df_grouped.frp.min()]
nightfire_df_grouped_min = nightfire_df_subset_min.groupby(['lats_arcmin', 'lons_arcmin'],
as_index=False).agg({'RH': np.mean})
#
plt.close('all')
fig = plt.figure(figsize=(7, 7))
ax0 = plt.subplot(111)
ax0.set_xlim([0,18])
ax0.set_xlabel('Binned FRP (MW)', fontsize=18)
ax0.set_ylabel('Frequency', fontsize=18)
ax0.hist(nightfire_df_grouped_all.RH, bins=50, histtype='step', cumulative=True, color='gray', label='All VIIRS')
ax0.hist(nightfire_df_grouped_vza.RH, bins=50, histtype='bar', cumulative=True, color='gray', label='VIIRS (VZA<22)')
ax0.hist(nightfire_df_grouped_min.RH, bins=50, histtype='bar', cumulative=True, color='r', label='VIIRS (>min(SLSTR))')
ax0.hist(sls_df_grouped.frp, bins=50, histtype='bar', cumulative=True, color='k', label='SLSTR (VZA<22)')
plt.legend(loc=1)
plt.savefig(os.path.join(output_path, 'NA_flare_counts_viirs_vs_slstr.png'), bbox_inches='tight', dpi=600)
plt.show()
In [788]:
nightfire_df_subset[nightfire_df_subset.RH < sls_df_grouped.frp.min()].RH.describe()
Out[788]:
In [97]:
c = ['United States', 'Canada', 'China']
m = ~viirs_unique_flares_ats['countries'].isin(c)
print 'ats matched / viirs observed:', np.sum(viirs_unique_flares_ats.loc[m].within_arcmin) * 1.0/ np.sum(m) * 100,'%', np.sum(viirs_unique_flares_ats.loc[m].within_arcmin), np.sum(m)
m = ~viirs_unique_flares_sls['countries'].isin(c)
print 'sls matched / viirs observed:', np.sum(viirs_unique_flares_sls.loc[m].within_arcmin) * 1.0/ np.sum(m)* 100,'%', np.sum(viirs_unique_flares_sls.loc[m].within_arcmin), np.sum(m)
m = ~ats_unique_flares_viirs['countries'].isin(c)
print 'viirs matched / ats observed:', np.sum(ats_unique_flares_viirs.loc[m].within_arcmin) * 1.0/ np.sum(m)* 100,'%', np.sum(ats_unique_flares_viirs.loc[m].within_arcmin), np.sum(m)
m = ~ats_unique_flares_sls['countries'].isin(c)
print 'sls matched / ats observed:', np.sum(ats_unique_flares_sls.loc[m].within_arcmin) * 1.0/ np.sum(m)* 100,'%', np.sum(ats_unique_flares_sls.loc[m].within_arcmin), np.sum(m)
m = ~sls_unique_flares_viirs['countries'].isin(c)
print 'viirs matched / sls observed:', np.sum(sls_unique_flares_viirs.loc[m].within_arcmin) * 1.0/ np.sum(m)* 100,'%', np.sum(sls_unique_flares_viirs.loc[m].within_arcmin), np.sum(m)
m = ~sls_unique_flares_ats['countries'].isin(c)
print 'ats matched / sls observed:', np.sum(sls_unique_flares_ats.loc[m].within_arcmin) * 1.0/ np.sum(m)* 100,'%', np.sum(sls_unique_flares_ats.loc[m].within_arcmin), np.sum(m)
In this analysis we do not use the cloud adjusted FRP values as we are not worrying about sampling differences, we just assume that the cloud characterisitics are similar for each sensor (which may be wrong as we are looking at different times of the day) and take the mean of the observations. We could get all the nighttime VIIRS obs to do this, but think it is unecessary. This uses data from March
In [98]:
# make sure that the march SLSTR data is restricted to flares only
sls_0317_flares = sls_hotspot_df_0317.copy()
sls_flare_locations = sls_flare_sampling_df_annum[['lats_arcmin', 'lons_arcmin']]
sls_flare_locations.drop_duplicates(inplace=True)
# restrict to flaring sites only
print sls_0317_flares.shape
sls_0317_flares = pd.merge(sls_0317_flares, sls_flare_locations, on=['lats_arcmin', 'lons_arcmin'])
print sls_0317_flares.shape
In [99]:
def round_to_nearest_01_degree(s):
return (s.round(1) * 10).astype(int)
In [544]:
viirs_persistent = viirs_iraq_hotspot_df.copy()
viirs_persistent['lats_01'] = round_to_nearest_01_degree(viirs_persistent.lats)
viirs_persistent['lons_01'] = round_to_nearest_01_degree(viirs_persistent.lons)
to_group = ['year', 'lats_01', 'lons_01']
to_agg = {'frp': np.mean,
'lats': np.mean,
'lons': np.mean}
viirs_persistent = viirs_persistent.groupby(to_group, as_index=False).agg(to_agg)
viirs_persistent['years_seen'] = 1
to_group = ['lats_01', 'lons_01']
to_agg = {'years_seen': np.sum}
viirs_persistent = viirs_persistent.groupby(to_group, as_index=False).agg(to_agg)
# find locations seen in both 2012 and 2017 data
viirs_persistent = viirs_persistent[viirs_persistent.years_seen == 2][['lats_01', 'lons_01']]
print 'total persistent viirs sites seen in 2012 and 2017:', viirs_persistent.shape[0]
In [545]:
# merge on the persistent sites to reduce the viirs data
viirs_sites = viirs_iraq_hotspot_df.copy()
viirs_sites['lats_01'] = round_to_nearest_01_degree(viirs_sites.lats)
viirs_sites['lons_01'] = round_to_nearest_01_degree(viirs_sites.lons)
viirs_sites = pd.merge(viirs_sites, viirs_persistent, on=['lats_01', 'lons_01'])
# load in comparison sites
ats_sites = ats_flare_df[(ats_flare_df.year == 2012) & (ats_flare_df.month == 3)].copy()
sls_sites = sls_hotspot_df_0317.copy()
ats_sites['lats_01'] = round_to_nearest_01_degree(ats_sites.lats)
ats_sites['lons_01'] = round_to_nearest_01_degree(ats_sites.lons)
sls_sites['lats_01'] = round_to_nearest_01_degree(sls_sites.lats)
sls_sites['lons_01'] = round_to_nearest_01_degree(sls_sites.lons)
In [546]:
# now group the clusters by time stamp and sum frp for each time stamp
to_group = ['year', 'month', 'day', 'sensor', 'lats_01', 'lons_01']
to_agg = {'lats': np.mean, 'lons':np.mean, 'frp': np.sum}
viirs_sites_grouped = viirs_sites.groupby(to_group, as_index=False).agg(to_agg)
ats_sites_grouped = ats_sites.groupby(to_group, as_index=False).agg(to_agg)
sls_sites_grouped = sls_sites.groupby(to_group, as_index=False).agg(to_agg)
# now get the typical behaviour for each cluster over the year and month
to_group = ['year', 'month', 'sensor', 'lats_01', 'lons_01']
to_agg = {'lats': np.mean, 'lons':np.mean, 'frp': np.mean, 'frp_std': np.std}
viirs_sites_grouped['frp_std'] = viirs_sites_grouped.frp.copy()
ats_sites_grouped['frp_std'] = ats_sites_grouped.frp.copy()
sls_sites_grouped['frp_std'] = sls_sites_grouped.frp.copy()
viirs_sites_means = viirs_sites_grouped.groupby(to_group, as_index=False).agg(to_agg)
ats_sites_means = ats_sites_grouped.groupby(to_group, as_index=False).agg(to_agg)
sls_sites_means = sls_sites_grouped.groupby(to_group, as_index=False).agg(to_agg)
In [547]:
# now merge on year and cluster
viirs_sites_means.rename(columns={'frp': 'frp_viirs', 'frp_std': 'frp_std_viirs'}, inplace=True)
ats_sites_means.rename(columns={'frp': 'frp_ats', 'frp_std': 'frp_std_ats'}, inplace=True)
sls_sites_means.rename(columns={'frp': 'frp_sls', 'frp_std': 'frp_std_sls'}, inplace=True)
on = ['year', 'lats_01', 'lons_01']
cols_ats = ['year', 'lats_01', 'lons_01', 'frp_ats', 'frp_std_ats']
cols_sls = ['year', 'lats_01', 'lons_01', 'frp_sls', 'frp_std_sls']
cols_viirs = ['year', 'lats_01', 'lons_01', 'frp_viirs', 'frp_std_viirs', 'lats', 'lons']
ats_vs_viirs = pd.merge(viirs_sites_means[cols_viirs], ats_sites_means[cols_ats], on=on)
sls_vs_viirs = pd.merge(viirs_sites_means[cols_viirs], sls_sites_means[cols_sls], on=on)
ats_vs_sls = pd.merge(ats_sites_means[cols_ats], sls_sites_means[cols_sls], on=['lats_01', 'lons_01'])
In [548]:
# mask
#ats_vs_viirs = ats_vs_viirs[(ats_vs_viirs.frp_ats < 150) & (ats_vs_viirs.frp_viirs < 150)]
#sls_vs_viirs = sls_vs_viirs[(sls_vs_viirs.frp_sls < 150) & (sls_vs_viirs.frp_viirs < 150)]
max_lat = 36
min_lat = 20
min_lon = 42
max_lon = 60
ats_vs_viirs = ats_vs_viirs[(ats_vs_viirs.lats < max_lat) &
(ats_vs_viirs.lats > min_lat) &
(ats_vs_viirs.lons > min_lon) &
(ats_vs_viirs.lons < max_lon)]
sls_vs_viirs = sls_vs_viirs[(sls_vs_viirs.lats < max_lat) &
(sls_vs_viirs.lats > min_lat) &
(sls_vs_viirs.lons > min_lon) &
(sls_vs_viirs.lons < max_lon)]
In [768]:
def rmse(predictions, targets):
return np.sqrt(((predictions - targets) ** 2).mean())
# plot a scale bar with 4 subdivisions on the left side of the map
def scale_bar_left(ax, bars=1, length=None, location=(0.1, 0.05), linewidth=3, col='black'):
"""
ax is the axes to draw the scalebar on.
bars is the number of subdivisions of the bar (black and white chunks)
length is the length of the scalebar in km.
location is left side of the scalebar in axis coordinates.
(ie. 0 is the left side of the plot)
linewidth is the thickness of the scalebar.
color is the color of the scale bar
"""
# Get the limits of the axis in lat long
llx0, llx1, lly0, lly1 = ax.get_extent(ccrs.PlateCarree())
# Make tmc aligned to the left of the map,
# vertically at scale bar location
sbllx = llx0 + (llx1 - llx0) * location[0]
sblly = lly0 + (lly1 - lly0) * location[1]
tmc = ccrs.TransverseMercator(sbllx, sblly)
# Get the extent of the plotted area in coordinates in metres
x0, x1, y0, y1 = ax.get_extent(tmc)
# Turn the specified scalebar location into coordinates in metres
sbx = x0 + (x1 - x0) * location[0]
sby = y0 + (y1 - y0) * location[1]
# Calculate a scale bar length if none has been given
# (Theres probably a more pythonic way of rounding the number but this works)
if not length:
length = (x1 - x0) / 5000 # in km
ndim = int(np.floor(np.log10(length))) # number of digits in number
length = round(length, -ndim) # round to 1sf
# Returns numbers starting with the list
def scale_number(x):
if str(x)[0] in ['1', '2', '5']:
return int(x)
else:
return scale_number(x - 10 ** ndim)
length = scale_number(length)
# Generate the x coordinate for the ends of the scalebar
bar_xs = [sbx, sbx + length * 1000 / bars]
# Plot the scalebar chunks
barcol = 'black'
for i in range(0, bars):
# plot the chunk
ax.plot(bar_xs, [sby, sby], transform=tmc, color=barcol, linewidth=linewidth)
# alternate the colour
if barcol == 'white':
barcol = 'dimgrey'
else:
barcol = 'white'
# Generate the x coordinate for the number
bar_xt = sbx + i * length * 1000 / bars
# Plot the scalebar label for that chunk
# ax.text(bar_xt, sby, str(int(i * length / bars)), transform=tmc,
# horizontalalignment='center', verticalalignment='bottom',
# color=col)
# work out the position of the next chunk of the bar
bar_xs[0] = bar_xs[1]
bar_xs[1] = bar_xs[1] + length * 1000 / bars
# Generate the x coordinate for the last number
bar_xt = sbx + length * 1000
# Plot the last scalebar label
ax.text(bar_xt/2., sby, str(int(length))+ ' km', transform=tmc,
horizontalalignment='center', verticalalignment='bottom',
color=col)
# Plot the unit label below the bar
bar_xt = sbx + length * 1000 / 2
bar_yt = y0 + (y1 - y0) * (location[1] / 4)
ax.text(bar_xt, bar_yt, 'km', transform=tmc, horizontalalignment='center',
verticalalignment='bottom', color=col)
plt.close('all')
fig = plt.figure(figsize=(18, 5))
ax0 = plt.subplot(131)
ax1 = plt.subplot(132)
ax2 = plt.subplot(133, projection=ccrs.PlateCarree())
h = ax0.hexbin(ats_vs_viirs.frp_viirs, ats_vs_viirs.frp_ats, gridsize=45, mincnt=1,
cmap='Blues', linewidths=0.5, edgecolors='k',extent=[-5, 200, -5, 200])
cbar = plt.colorbar(mappable=h, ax=ax0).set_label('Sample Count', fontsize=15)
h = ax1.hexbin(sls_vs_viirs.frp_viirs, sls_vs_viirs.frp_sls, gridsize=45, mincnt=1,
cmap='Reds', linewidths=0.5, edgecolors='k', extent=[-5, 200, -5, 200])
cbar = plt.colorbar(mappable=h, ax=ax1).set_label('Sample Count', fontsize=15)
# sns.regplot(ats_vs_viirs.frp_viirs, ats_vs_viirs.frp_ats, ci=95, color='k', ax=ax0, scatter=False)
# sns.regplot(sls_vs_viirs.frp_viirs, sls_vs_viirs.frp_sls, ci=95, color='k', ax=ax1, scatter=False)
ax0.text(0.90, 0.02, '(a)', transform=ax0.transAxes, fontsize=14)
ax1.text(0.90, 0.02, '(b)', transform=ax1.transAxes, fontsize=14)
ax2.text(0.93, 0.02, '(c)', transform=ax2.transAxes, fontsize=14)
x_lim = (-5,200)
y_lim = (-5,200)
ax0.plot(x_lim, y_lim, color='k', linestyle='-', linewidth=1)
ax1.plot(x_lim, y_lim, color='k', linestyle='-', linewidth=1)
# ax0.set_xlim(x_lim)
# ax1.set_xlim(x_lim)
# ax0.set_ylim(y_lim)
# ax1.set_ylim(y_lim)
ax0.set(ylabel='AATSR Mean Cluster FRP (MW)', xlabel='VIIRS Mean Cluster FRP (MW)')
ax0.yaxis.label.set_size(12)
ax0.xaxis.label.set_size(12)
ax1.set(ylabel='SLSTR Mean Cluster FRP (MW)', xlabel='VIIRS Mean Cluster FRP (MW)')
ax1.yaxis.label.set_size(12)
ax1.xaxis.label.set_size(12)
slope0, intercept0, r_value0, _, _ = scipy.stats.linregress(ats_vs_viirs.frp_viirs,
ats_vs_viirs.frp_ats)
slope1, intercept1, r_value1, _, _ = scipy.stats.linregress(sls_vs_viirs.frp_viirs,
sls_vs_viirs.frp_sls)
# zero intercept reg
x = sls_vs_viirs.frp_viirs.values[:,np.newaxis]
y = sls_vs_viirs.frp_sls.values
a, _, _, _ = np.linalg.lstsq(x, y)
ats_median = np.median(ats_vs_viirs.frp_ats - ats_vs_viirs.frp_viirs)
sls_median = np.median(sls_vs_viirs.frp_sls - sls_vs_viirs.frp_viirs)
print 'ats median:', ats_median
print 'sls median:', sls_median
ats_mean = np.mean(ats_vs_viirs.frp_ats - ats_vs_viirs.frp_viirs)
sls_mean = np.mean(sls_vs_viirs.frp_sls - sls_vs_viirs.frp_viirs)
ats_sd = np.std(ats_vs_viirs.frp_ats - ats_vs_viirs.frp_viirs)
sls_sd = np.std(sls_vs_viirs.frp_sls - sls_vs_viirs.frp_viirs)
# # adjust sls data
adjusted_sls = sls_vs_viirs.frp_sls.values*1.11
print
#print 'sls scaling factor: ', 1/a
print 'adjusted sls median:', np.median(adjusted_sls - sls_vs_viirs.frp_viirs )
print 'adjusted sls mean:', np.mean(adjusted_sls - sls_vs_viirs.frp_viirs )
print 'adjusted sls sd:', np.std(adjusted_sls - sls_vs_viirs.frp_viirs)
textstr0 = '$R^2=%.3f$\n Bias=%.2f\n $\sigma$=%.2f\n Samples$=%.0f$' % (r_value0**2, ats_mean, ats_sd, ats_vs_viirs.frp_viirs.shape[0])
textstr1 = '$R^2=%.3f$\n Bias=%.2f\n $\sigma$=%.2f\n Samples$=%.0f$' % (r_value1**2, sls_mean, sls_sd, sls_vs_viirs.frp_viirs.shape[0])
props = dict(boxstyle='round', facecolor='white', alpha=0.5)
ax0.text(0.05, 0.95, textstr0, transform=ax0.transAxes, fontsize=14,
verticalalignment='top', bbox=props)
ax1.text(0.05, 0.95, textstr1, transform=ax1.transAxes, fontsize=14,
verticalalignment='top', bbox=props)
# do sampling map
# xlims = [(-180, 180), (-105, -87), (4, 9), (46, 56), (65, 82), (106, 125)]
# ylims = [(-90, 90), (25, 39), (3, 7), (23, 33), (55, 68), (33, 45)]
# pos = [(-102, 40), (-2, -1), (39, 26), (83, 62), (113, 47)]
ax2.set_xlim((min_lon, max_lon))
ax2.set_ylim((min_lat, max_lat))
ax2.coastlines()
gl = ax2.gridlines(crs=ccrs.PlateCarree(), draw_labels=True,
linewidth=1, color='gray', alpha=0.5, linestyle='--')
ax2.plot(sls_vs_viirs.lons, sls_vs_viirs.lats, 'rx', alpha=0.5, markersize=10, label='SLSTR & VIIRS')
ax2.plot(ats_vs_viirs.lons, ats_vs_viirs.lats, 'b.', alpha=0.2, markersize=10, label='AATSR & VIIRS')
ax2.arrow(58, 30, 0, 2, head_width=0.5, transform=ccrs.PlateCarree())
ax2.text(57.65, 33, 'N', transform=ccrs.PlateCarree(), fontsize=14)
ax2.legend(prop={'size': 15})
scale_bar_left(ax2, bars=1, length=250, location=(0.05, 0.25), linewidth=3)
plt.savefig(os.path.join(output_path, 'viirs_vs_ats_iraq.png'), bbox_inches='tight', dpi=600)
plt.show()
In [106]:
# merge on the persistent sites to reduce the viirs data
viirs_got_sites = viirs_got_hotspot_df.copy()
viirs_got_sites['lats_01'] = round_to_nearest_01_degree(viirs_got_sites.lats)
viirs_got_sites['lons_01'] = round_to_nearest_01_degree(viirs_got_sites.lons)
mask_got = ((viirs_got_sites.lats >= 5) & (viirs_got_sites.lats <= 11) &
(viirs_got_sites.lons >= 100) & (viirs_got_sites.lons <= 108))
viirs_got_sites = viirs_got_sites[mask_got]
viirs_libya_sites = viirs_libya_hotspot_df.copy()
viirs_libya_sites['lats_01'] = round_to_nearest_01_degree(viirs_libya_sites.lats)
viirs_libya_sites['lons_01'] = round_to_nearest_01_degree(viirs_libya_sites.lons)
mask_libya = ((viirs_libya_sites.lats >= 27) & (viirs_libya_sites.lats <= 34) &
(viirs_libya_sites.lons >= 2) & (viirs_libya_sites.lons <= 12))
viirs_libya_sites = viirs_libya_sites[mask_libya]
viirs_oman_sites = viirs_oman_hotspot_df.copy()
viirs_oman_sites['lats_01'] = round_to_nearest_01_degree(viirs_oman_sites.lats)
viirs_oman_sites['lons_01'] = round_to_nearest_01_degree(viirs_oman_sites.lons)
mask_oman = ((viirs_oman_sites.lats >= 16) & (viirs_oman_sites.lats <= 22) &
(viirs_oman_sites.lons >= 51) & (viirs_oman_sites.lons <= 60))
viirs_oman_sites = viirs_oman_sites[mask_oman]
# load in comparison sls sites
sls_sites = sls_hotspot_df_0317.copy()
sls_sites['lats_01'] = round_to_nearest_01_degree(sls_sites.lats)
sls_sites['lons_01'] = round_to_nearest_01_degree(sls_sites.lons)
In [107]:
# now group the clusters by time stamp and sum frp for each time stamp
to_group = ['year', 'month', 'day', 'sensor', 'lats_01', 'lons_01']
to_agg = {'lats': np.mean, 'lons':np.mean, 'frp': np.sum}
viirs_got_grouped = viirs_got_sites.groupby(to_group, as_index=False).agg(to_agg)
viirs_libya_grouped = viirs_libya_sites.groupby(to_group, as_index=False).agg(to_agg)
viirs_oman_grouped = viirs_oman_sites.groupby(to_group, as_index=False).agg(to_agg)
sls_sites_grouped = sls_sites.groupby(to_group, as_index=False).agg(to_agg)
# now get the typical behaviour for each cluster over the year and month
to_group = ['year', 'month', 'sensor', 'lats_01', 'lons_01']
to_agg = {'lats': np.mean, 'lons':np.mean, 'frp': np.mean}
viirs_got_means = viirs_got_grouped.groupby(to_group, as_index=False).agg(to_agg)
viirs_libya_means = viirs_libya_grouped.groupby(to_group, as_index=False).agg(to_agg)
viirs_oman_means = viirs_oman_grouped.groupby(to_group, as_index=False).agg(to_agg)
sls_sites_means = sls_sites_grouped.groupby(to_group, as_index=False).agg(to_agg)
In [108]:
# now merge on year and cluster
viirs_got_means.rename(columns={'frp': 'frp_viirs', }, inplace=True)
viirs_libya_means.rename(columns={'frp': 'frp_viirs'}, inplace=True)
viirs_oman_means.rename(columns={'frp': 'frp_viirs'}, inplace=True)
sls_sites_means.rename(columns={'frp': 'frp_sls', 'frp_std': 'frp_std_sls'}, inplace=True)
on = ['year', 'lats_01', 'lons_01']
cols_sls = ['year', 'lats_01', 'lons_01', 'frp_sls']
cols_viirs = ['year', 'lats_01', 'lons_01', 'frp_viirs', 'lats', 'lons']
sls_vs_viirs_got = pd.merge(viirs_got_means[cols_viirs], sls_sites_means[cols_sls], on=on)
sls_vs_viirs_libya = pd.merge(viirs_libya_means[cols_viirs], sls_sites_means[cols_sls], on=on)
sls_vs_viirs_oman = pd.merge(viirs_oman_means[cols_viirs], sls_sites_means[cols_sls], on=on)
In [109]:
def rmse(predictions, targets):
return np.sqrt(((predictions - targets) ** 2).mean())
plt.close('all')
fig = plt.figure(figsize=(18, 5))
ax0 = plt.subplot(131)
ax1 = plt.subplot(132)
ax2 = plt.subplot(133)
h = ax0.hexbin(sls_vs_viirs_got.frp_viirs, sls_vs_viirs_got.frp_sls, gridsize=100, mincnt=1,
cmap='Blues', linewidths=0.5, edgecolors='k',extent=[-5, 50, -5, 50])
cbar = plt.colorbar(mappable=h, ax=ax0).set_label('Sample Count', fontsize=15)
h = ax1.hexbin(sls_vs_viirs_libya.frp_viirs, sls_vs_viirs_libya.frp_sls, gridsize=45, mincnt=1,
cmap='Reds', linewidths=0.5, edgecolors='k', extent=[-5, 50, -5, 50])
cbar = plt.colorbar(mappable=h, ax=ax1).set_label('Sample Count', fontsize=15)
h = ax2.hexbin(sls_vs_viirs_oman.frp_viirs, sls_vs_viirs_oman.frp_sls, gridsize=45, mincnt=1,
cmap='Greys', linewidths=0.5, edgecolors='k', extent=[-5, 50, -5, 50])
cbar = plt.colorbar(mappable=h, ax=ax2).set_label('Sample Count', fontsize=15)
# sns.regplot(ats_vs_viirs.frp_viirs, ats_vs_viirs.frp_ats, ci=95, color='k', ax=ax0, scatter=False)
# sns.regplot(sls_vs_viirs.frp_viirs, sls_vs_viirs.frp_sls, ci=95, color='k', ax=ax1, scatter=False)
ax0.text(0.90, 0.02, '(a)', transform=ax0.transAxes, fontsize=14)
ax1.text(0.90, 0.02, '(b)', transform=ax1.transAxes, fontsize=14)
ax2.text(0.90, 0.02, '(c)', transform=ax2.transAxes, fontsize=14)
# set limits
ax0.plot((0,200), (0,200), color='k', linestyle='-', linewidth=1)
ax1.plot((0,200), (0,200), color='k', linestyle='-', linewidth=1)
ax2.plot((0,200), (0,200), color='k', linestyle='-', linewidth=1)
ax0.set_xlim((0,20))
ax1.set_xlim((0,50))
ax2.set_xlim((0,30))
ax0.set_ylim((0,20))
ax1.set_ylim((0,50))
ax2.set_ylim((0,30))
ax0.set(ylabel='SLS Mean Cluster FRP(MW)', xlabel='VIIRS Mean Cluster FRP (MW)')
ax0.yaxis.label.set_size(12)
ax0.xaxis.label.set_size(12)
ax1.set(xlabel='VIIRS Mean Cluster FRP (MW)')
ax1.xaxis.label.set_size(12)
ax2.set(xlabel='VIIRS Mean Cluster FRP (MW)')
ax2.xaxis.label.set_size(12)
slope0, intercept0, r_value0, _, _ = scipy.stats.linregress(sls_vs_viirs_got.frp_viirs,
sls_vs_viirs_got.frp_sls)
slope1, intercept1, r_value1, _, _ = scipy.stats.linregress(sls_vs_viirs_libya.frp_viirs,
sls_vs_viirs_libya.frp_sls)
slope2, intercept2, r_value2, _, _ = scipy.stats.linregress(sls_vs_viirs_oman.frp_viirs,
sls_vs_viirs_oman.frp_sls)
# zero intercept reg
# x = sls_vs_viirs.frp_viirs.values[:,np.newaxis]
# y = sls_vs_viirs.frp_sls.values
# a, _, _, _ = np.linalg.lstsq(x, y)
got_median = np.median(sls_vs_viirs_got.frp_viirs - sls_vs_viirs_got.frp_sls)
libya_median = np.median(sls_vs_viirs_libya.frp_viirs - sls_vs_viirs_libya.frp_sls)
oman_median = np.median(sls_vs_viirs_oman.frp_viirs - sls_vs_viirs_oman.frp_sls)
print 'got median:', got_median
print 'libya median:', libya_median
print 'oman median:', oman_median
got_mean = np.mean(sls_vs_viirs_got.frp_viirs - sls_vs_viirs_got.frp_sls)
libya_mean = np.mean(sls_vs_viirs_libya.frp_viirs - sls_vs_viirs_libya.frp_sls)
oman_mean = np.mean(sls_vs_viirs_oman.frp_viirs - sls_vs_viirs_oman.frp_sls)
got_sd = np.std(sls_vs_viirs_got.frp_viirs - sls_vs_viirs_got.frp_sls)
libya_sd = np.std(sls_vs_viirs_libya.frp_viirs - sls_vs_viirs_libya.frp_sls)
oman_sd = np.std(sls_vs_viirs_oman.frp_viirs - sls_vs_viirs_oman.frp_sls)
textstr0 = '$R^2=%.3f$\n Bias=%.2f\n $\sigma$=%.2f\n Samples$=%.0f$' % (r_value0**2, got_mean, got_sd, sls_vs_viirs_got.frp_viirs.shape[0])
textstr1 = '$R^2=%.3f$\n Bias=%.2f\n $\sigma$=%.2f\n Samples$=%.0f$' % (r_value1**2, libya_mean, libya_sd, sls_vs_viirs_libya.frp_viirs.shape[0])
textstr2 = '$R^2=%.3f$\n Bias=%.2f\n $\sigma$=%.2f\n Samples$=%.0f$' % (r_value2**2, oman_mean, oman_sd, sls_vs_viirs_oman.frp_viirs.shape[0])
props = dict(boxstyle='round', facecolor='white', alpha=0.5)
ax0.text(0.05, 0.95, textstr0, transform=ax0.transAxes, fontsize=14,
verticalalignment='top', bbox=props)
ax1.text(0.05, 0.95, textstr1, transform=ax1.transAxes, fontsize=14,
verticalalignment='top', bbox=props)
ax2.text(0.05, 0.95, textstr2, transform=ax2.transAxes, fontsize=14,
verticalalignment='top', bbox=props)
#plt.savefig(os.path.join(output_path, 'viirs_vs_ats_iraq.png'), bbox_inches='tight', dpi=600)
plt.show()
In [110]:
ats_grouped_flares = ats_flare_df.groupby(['lats_arcmin', 'lons_arcmin'], as_index=False).agg({'frp': np.mean})
print (ats_grouped_flares.frp < 1).sum() * 1.0/ ats_grouped_flares.frp.shape[0] * 100
sls_grouped_flares = sls_flare_df.groupby(['lats_arcmin', 'lons_arcmin'], as_index=False).agg({'frp': np.mean})
print (sls_grouped_flares.frp < 1).sum() * 1.0/ sls_grouped_flares.frp.shape[0] * 100
In [111]:
# For RAL to determine small persistent flaring sites
sls_flare_positions = sls_flare_df.groupby(['lats_arcmin', 'lons_arcmin'], as_index=False).agg({'frp': np.mean, 'sample_counts_flare':np.sum, 'lats':np.mean, 'lons':np.mean})
sls_flare_positions_subset = sls_flare_positions[(sls_flare_positions.frp < 1.5) & (sls_flare_positions.sample_counts_flare > 20)]
fig = plt.figure(figsize=(12, 5))
ax = plt.subplot(111, projection=ccrs.PlateCarree())
ax.coastlines()
gl = ax.gridlines(crs=ccrs.PlateCarree(), draw_labels=True,
linewidth=1, color='gray', alpha=0.5, linestyle='--')
ax.plot(sls_flare_positions_subset.lons, sls_flare_positions_subset.lats, 'r.', alpha=1, markersize=4, label='> 20 obs & < 1.5 MW')
ax.legend(prop={'size': 15})
plt.savefig('slstr_persistent_flares_for_ral.png', bbox_inches='tight', dpi=600)
sls_flare_positions_subset.to_csv('persistent_slstr_flare_locations_lt1_5mw_gt20obs.csv')
First restrict slstr data to an annum. Currently we have contiguous data from June 2017 through May 2018, so lets set this as the annum that we are using. Think we can justify as it is the most up to date dataset and we are just doing it as a demonstration. If not we get and process all the data, asking for it off CEMS
In [394]:
# adjust SLSTR year
sls_flare_df_annum.loc[:, 'year'] = 2018
sls_flare_sampling_df_annum.loc[:, 'year'] = 2018
In [395]:
def counts_by_country(ats_flare_df, sls_flare_df, ats_countries_df, sls_countries_df):
flare_df = pd.concat([ats_flare_df, sls_flare_df])
countries_df = pd.concat([ats_countries_df, sls_countries_df])
# rename years where we have data from both sensors (1996 AT1 & AT2; 2002 AT2 & ATS )
flare_df.sensor.loc[flare_df.year == 1996] = 'AT1 & AT2'
flare_df.sensor.loc[flare_df.year == 2002] = 'AT2 & ATS'
# get unique countries
countries_df.drop_duplicates(['countries', 'lats_arcmin', 'lons_arcmin'], inplace=True)
# get unique annual flares
annual_unique_flares = flare_df.drop_duplicates(['lats_arcmin', 'lons_arcmin', 'year'])[['lats_arcmin', 'lons_arcmin', 'year']]
annual_unique_flares_by_sensor = flare_df.drop_duplicates(['lats_arcmin', 'lons_arcmin', 'year', 'sensor'])[['lats_arcmin', 'lons_arcmin', 'year', 'sensor']]
# merge countries
annual_unique_flares = annual_unique_flares.merge(countries_df[['countries', 'lats_arcmin', 'lons_arcmin']], on=['lats_arcmin', 'lons_arcmin'])
annual_unique_flares_by_sensor = annual_unique_flares_by_sensor.merge(countries_df[['countries', 'lats_arcmin', 'lons_arcmin']], on=['lats_arcmin', 'lons_arcmin'])
# add active flares and flare locations columns
annual_unique_flares['active_flare_locations'] = 1
annual_unique_flares_by_sensor['active_flare_locations'] = 1
# now group
grouped_annual_unique_flares = annual_unique_flares.groupby(['year', 'countries'], as_index=False).agg(
{'active_flare_locations': np.sum})
grouped_annual_unique_flares_by_sensor = annual_unique_flares_by_sensor.groupby(['year', 'countries', 'sensor'], as_index=False).agg(
{'active_flare_locations': np.sum})
# divide by 1000 (to get counts in 000s)
grouped_annual_unique_flares['active_flare_locations'] = grouped_annual_unique_flares.active_flare_locations /1000
grouped_annual_unique_flares_by_sensor['active_flare_locations'] = grouped_annual_unique_flares_by_sensor.active_flare_locations /1000
return grouped_annual_unique_flares, grouped_annual_unique_flares_by_sensor
In [396]:
annual_flare_activity_df, annual_flare_activity_by_sensor_df = counts_by_country(ats_flare_df,
sls_flare_df_annum,
ats_countries_df,
sls_countries_df)
In [397]:
#for yr in [1991, 2012]:
# annual_flare_activity_by_sensor_df = annual_flare_activity_by_sensor_df[annual_flare_activity_by_sensor_df.year != yr]
# get the global annual FRP values
global_activity = annual_flare_activity_by_sensor_df.groupby(['sensor', 'year'], as_index=False).agg({'active_flare_locations': np.sum})
global_activity = global_activity.pivot('year', 'sensor', 'active_flare_locations')
global_activity.columns.name = None
global_activity.rename(columns={'at1': 'AT1', 'at2':'AT2', 'ats':'ATS', 'sls':'SLS'}, inplace=True)
global_activity = global_activity[['AT1', 'AT1 & AT2', 'AT2', 'AT2 & ATS', 'ATS', 'SLS']]
to_group = ['countries', 'year']
to_agg = {'active_flare_locations': np.sum}
screened_countries_activity_df = annual_flare_activity_by_sensor_df.groupby(to_group, as_index=False).agg(to_agg)
# # find the top n flaring countries
n_countries = 30
top_flaring_countries = screened_countries_activity_df.groupby('countries').agg({'active_flare_locations': np.sum}).sort_values('active_flare_locations', ascending=False).head(n_countries).reset_index()
# # reduce to the top countries
top_country_activity = screened_countries_activity_df.merge(top_flaring_countries[['countries']], on=['countries'])
# pivoted
pivoted_df = top_country_activity.pivot('year', 'countries', 'active_flare_locations')
df_list = []
for c in top_flaring_countries.countries.values:
df_list.append(pivoted_df[c])
top_country_activity = pd.concat(df_list, axis=1)
In [398]:
pc_plot_counts = pd.DataFrame(top_country_activity.values / annual_flare_activity_by_sensor_df.groupby(['year'], as_index=True).agg({'active_flare_locations': np.sum}).values * 100,
columns=top_country_activity.T.index,
index=top_country_activity.index)
In [399]:
# sort out diff plot showing only years with reasonable sampling
diff_plot = pc_plot_counts.copy(deep=True)
diff_plot.drop([1991,1992,2012], axis=0, inplace=True)
diff_plot = diff_plot - diff_plot.shift()
diff_plot.loc[1991] = np.nan
diff_plot.loc[1992] = np.nan
diff_plot.loc[2012] = np.nan
diff_plot.sort_index(inplace=True)
In [400]:
plt.close('all')
#fig, ax = plt.figure(figsize=(18,7))
fig, (ax0, ax1, ax2) = plt.subplots(nrows=3, ncols=1,
gridspec_kw={'height_ratios': [2, 7.5, 7.5]},
figsize=(15, 22))
sns.heatmap(global_activity.T, linewidths=1,
robust=True, annot=True,
annot_kws={'fontsize':13},
fmt='.1f',
cbar=False,
cmap='Greys_r',
vmax=18,
ax=ax0)
ax0.set(xlabel='', ylabel="a) Global Count ($x10^3$)")
ax0.yaxis.label.set_size(20)
ax0.set_xticklabels(ax0.get_xticklabels(), fontsize=12)
ax0.set_yticklabels(ax0.get_yticklabels(), fontsize=12, rotation=0)
sns.heatmap(pc_plot_counts.T, linewidths=1,
robust=True, annot=True,
annot_kws={'fontsize':13},
fmt='.1f',
cbar=False,
cmap='viridis',
vmax = 22,
ax=ax1)
ax1.set(xlabel='', ylabel='b) Fraction of Global Count (%)')
ax1.yaxis.label.set_size(20)
ax1.set_yticklabels(ax1.get_yticklabels(), fontsize=12)
ax1.set_xticklabels(ax1.get_xticklabels(), fontsize=12)
sns.heatmap(diff_plot.T, linewidths=1,
robust=True, annot=True,
annot_kws={'fontsize':13},
fmt='.1f',
cbar=False,
cmap='coolwarm',
vmax=3, vmin=-3,
ax=ax2)
ax2.set(xlabel='Year', ylabel='c) Annual Change in Fraction of Global Count (%)')
ax2.yaxis.label.set_size(20)
ax2.xaxis.label.set_size(20)
ax2.set_xticklabels(ax1.get_xticklabels(), fontsize=12)
ax2.set_yticklabels(ax1.get_yticklabels(), fontsize=12)
plt.savefig(os.path.join(output_path, 'flare_counts_analysis.png'), bbox_inches='tight', dpi=600)
plt.show()
Figure Comments: The global figure shows all counts for all sensors (exlcuding 2016 data for SLS as no flare observations). The country level analysis uses only the valid non-overlapping years (which are defined in the code), so: AT2 1995; AT1 1996; ATS 2002; & AT2 2003 are not used). This needs to be done, as the geospatial alginment is not exact and we cannot be certain on the total counts if we combine sensors (i.e. perhaps double counting some flares).
In [401]:
activity_global_pc_change = annual_flare_activity_by_sensor_df.groupby(['sensor', 'year'], as_index=False).agg({'active_flare_locations': np.sum})
activity_global_pc_change = activity_global_pc_change.sort_values('year')
activity_global_pc_change['pc_change'] = (activity_global_pc_change.active_flare_locations / activity_global_pc_change.active_flare_locations.shift() - 1)
activity_global_pc_change.drop('active_flare_locations', axis=1, inplace=True)
activity_global_pc_change = activity_global_pc_change.pivot('year', 'sensor', 'pc_change')
activity_global_pc_change.rename(columns={'at1': 'AT1', 'at2':'AT2', 'ats':'ATS', 'sls':'SLS'}, inplace=True)
activity_global_pc_change = activity_global_pc_change[['AT1', 'AT1 & AT2', 'AT2', 'AT2 & ATS', 'ATS', 'SLS']]
activity_country_pc_change = top_country_activity.copy()
activity_country_pc_change = (activity_country_pc_change / activity_country_pc_change.shift() - 1)
for yr in [1992]:
activity_global_pc_change = activity_global_pc_change[activity_global_pc_change.index != yr]
activity_country_pc_change = activity_country_pc_change[activity_country_pc_change.index != yr]
In [402]:
plt.close('all')
#fig, ax = plt.figure(figsize=(18,7))
fig, (ax0, ax1) = plt.subplots(nrows=2, ncols=1,
gridspec_kw={'height_ratios': [2, 7.5]},
figsize=(15, 12))
sns.heatmap(activity_global_pc_change.T*100, linewidths=1,
robust=True, annot=True,
fmt='.0f', annot_kws={'fontsize':13},
cbar=False,
cmap='coolwarm',
vmax=10, vmin=-10,
ax=ax0)
ax0.set(xlabel='', ylabel='Global')
ax0.yaxis.label.set_size(20)
ax0.set_xticklabels(ax0.get_xticklabels(), fontsize=12)
ax0.set_yticklabels(ax0.get_yticklabels(), fontsize=12, rotation=0)
sns.heatmap(activity_country_pc_change.T*100, linewidths=1,
robust=True, annot=True,
fmt='.0f', annot_kws={'fontsize':13},
cbar=False,
cmap='coolwarm',
vmax=50, vmin=-50,
ax=ax1)
ax1.set(xlabel='Year', ylabel='Country')
ax1.yaxis.label.set_size(20)
ax1.xaxis.label.set_size(20)
ax1.set_xticklabels(ax1.get_xticklabels(), fontsize=12)
ax1.set_yticklabels(ax1.get_yticklabels(), fontsize=12)
plt.savefig(os.path.join(output_path, 'ats_counts_change.png'), bbox_inches='tight', dpi=600)
plt.show()
First group each arcminute grid cell for each year in the flare dataframe, aggregating the fire radiative power and the total number of observations in that year by summing. Do the same for the sampling dataframe (i.e. the number of overpass for a given flare).
The cloud adjusted mean is computed as the sum of the hotspot's FRP for the year divided by the total number of overpasses for the hotspot adjusted for cloud cover e.g. 6000 [MW] / ( 120 [Obs.] * (1 - 0.6 [CC]). So we in effect get the mean FRP based on some assumption of number of cloud free observations (this also normalises for variance in observation). The cloud cover I estimate from an 8 by 8 window around each hotspot, from which I take the ATSR cloud flag. This is then averaged for the year, so I get an annual mean cloud cover estimate for each hotspot.
what happens if the "hotspot" is observed to be not there? i.e. a location previously seen to be a flare is cloud free - but now there is nothing there (i.e. flare is turned off for a while...maybe it is re-detected in a few weeks or months?). Do these "zero" FRP values contribute to the "mean FRP" or not? it seems not from your calculation. if they do not then multippying the cloud-adjusted Mean FRP by the number of secs in a year will not i think come up with "correct FRE" will it? Because you are then counting the thing has burning at its "mean FRP" value even when it has been observed as not burning? When there has been a cloud free observation of the flares location and it is confirmed that there is "nothing there" then that time period should not contribute to the FRE calculation..or...the "mean FRE" should include the value of "zero FRP" observed for that observation.
The number of observations includes not only active flaring, but all overpasses of the flaring location during the year. So in the above example the 6000MW could be calculated from 50 of the 120 overpasses, but all 120 (including the 70 'zero' observations) are used for the mean (although this number is further adjusted for cloud cover). So it does take into account the issue of flare persistence.
Two different dataframe are created. One with sensor information and one without. The one without sensor information effectively creates a mean value for any overlapping years. This is unwanted, but useful in any analysis. The one with sensor information allows screening of year/sensor combinations so that we don't get any overlapping information contaminating the results.
In [403]:
def frp_by_country(flare_df, sampling_df, countries_df):
# get rid of the corrupted ATS orbit (see Malaysia/Indonesia analysis below for details)
bad_orbit_flare = (flare_df.year == 2011) & (flare_df.month == 10) & (flare_df.day == 10) & (flare_df.hhmm == 1240)
bad_orbit_sampling = (sampling_df.year == 2011) & (sampling_df.month == 10) & (sampling_df.day == 10) & (sampling_df.hhmm == 1240)
# apply bad orbit mask
print flare_df.shape
flare_df = flare_df.loc[~bad_orbit_flare, :]
print flare_df.shape
sampling_df = sampling_df.loc[~bad_orbit_sampling, :]
# compute the annual mean for each flare
to_group = ['lats_arcmin', 'lons_arcmin', 'year']
to_agg = {'frp': np.sum,
'sample_counts_flare': np.sum,
'lats': np.mean,
'lons': np.mean}
annual_summed_flare_df = flare_df.groupby(to_group, as_index=False).agg(to_agg)
# get the cloud cover adjusted sampling
to_group = ['lats_arcmin', 'lons_arcmin', 'year']
to_agg = {'cloud_cover': np.mean,
'sample_counts_all': np.sum}
annual_sample_df = sampling_df.groupby(to_group, as_index=False).agg(to_agg)
print annual_sample_df.head()
# merge and get cloud cloud adjusted sampling (i.e the expected number of observations)
annual_frp_df = pd.merge(annual_summed_flare_df, annual_sample_df, on=to_group)
annual_frp_df['cloud_adjusted_sample_counts'] = annual_frp_df.sample_counts_all * (1-annual_frp_df.cloud_cover)
# if expected less than actual, then set to actual number of obsercations
mask = annual_frp_df['cloud_adjusted_sample_counts'] < annual_frp_df['sample_counts_flare']
annual_frp_df.loc[mask, 'cloud_adjusted_sample_counts'] = annual_frp_df.loc[mask, 'sample_counts_flare']
# compute mean frp
annual_frp_df['cloud_adjusted_mean_frp'] = annual_frp_df.frp / annual_frp_df.cloud_adjusted_sample_counts
# merge countries
annual_frp_df = annual_frp_df.merge(countries_df[['countries', 'lats_arcmin', 'lons_arcmin']], on=['lats_arcmin', 'lons_arcmin'])
# sum on a per country basis
grouped_annual_frp = annual_frp_df.groupby(['year', 'countries'], as_index=False).agg({'cloud_adjusted_mean_frp': np.sum})
# ---------------------------------------
# Same again but with sensor information
# ---------------------------------------
# rename years where we have data from both sensors (1996 AT1 & AT2; 2002 AT2 & ATS )
flare_df.sensor.loc[flare_df.year == 1996] = 'AT1 & AT2'
flare_df.sensor.loc[flare_df.year == 2002] = 'AT2 & ATS'
sampling_df.sensor.loc[sampling_df.year == 1996] = 'AT1 & AT2'
sampling_df.sensor.loc[sampling_df.year == 2002] = 'AT2 & ATS'
# compute the annual mean for each flare
to_group = ['lats_arcmin', 'lons_arcmin', 'year', 'sensor']
to_agg = {'frp': np.sum,
'sample_counts_flare': np.sum,
'lats': np.mean,
'lons': np.mean}
annual_summed_flare_df = flare_df.groupby(to_group, as_index=False).agg(to_agg)
# get the cloud cover adjusted sampling
to_group = ['lats_arcmin', 'lons_arcmin', 'year', 'sensor']
to_agg = {'cloud_cover': np.mean,
'sample_counts_all': np.sum}
annual_sample_df = sampling_df.groupby(to_group, as_index=False).agg(to_agg)
# merge and get cloud cloud adjusted sampling (i.e the expected number of observations)
annual_frp_df = pd.merge(annual_summed_flare_df, annual_sample_df, on=to_group)
annual_frp_df['cloud_adjusted_sample_counts'] = annual_frp_df.sample_counts_all * (1-annual_frp_df.cloud_cover)
# if expected less than actual, then set to actual number of obsercations
mask = annual_frp_df['cloud_adjusted_sample_counts'] < annual_frp_df['sample_counts_flare']
annual_frp_df.loc[mask, 'cloud_adjusted_sample_counts'] = annual_frp_df.loc[mask, 'sample_counts_flare']
# compute mean frp
annual_frp_df['cloud_adjusted_mean_frp'] = annual_frp_df.frp / annual_frp_df.cloud_adjusted_sample_counts
# merge countries
annual_frp_df = annual_frp_df.merge(countries_df[['countries', 'lats_arcmin', 'lons_arcmin']], on=['lats_arcmin', 'lons_arcmin'])
# sum on a per country basis
grouped_annual_frp_by_sensor = annual_frp_df.groupby(['year', 'countries', 'sensor'], as_index=False).agg({'cloud_adjusted_mean_frp': np.sum})
return grouped_annual_frp, grouped_annual_frp_by_sensor, annual_frp_df
In [404]:
ats_annual_frp_by_country, ats_annual_frp_by_country_with_sensor, ats_annual_frp_df = frp_by_country(ats_flare_df,
ats_flare_sampling_df,
ats_countries_df)
sls_annual_frp_by_country, sls_annual_frp_by_country_wtih_sensor, sls_annual_frp_df = frp_by_country(sls_flare_df_annum,
sls_flare_sampling_df_annum,
sls_countries_df)
In [801]:
sls_annual_frp_df.head()
Out[801]:
In [805]:
sls_annual_frp_df[sls_annual_frp_df.countries == "United States"].cloud_adjusted_sample_counts.mean()
Out[805]:
We have cloud adjusted annual FRP for each valid flare. We Can use this information to create an "adjusted" monthly visualisation where the yearly frp is distributed over the observations, based onthe observations and thier weighting. It doesn't fully represent the true activity of the flare, but the assumed activity of the flare if it were operating contuously, which is not the case. But given observational limitations we cannot fully assign up time and this estimate is the best we can get.
In [406]:
# Get monhtly mean FRP for each flare (not normalised)
ats_monthly_frp_unnormalised = ats_flare_df.groupby(['lats_arcmin', 'lons_arcmin', 'year', 'month'], as_index=False).agg({'lats': np.mean, 'lons':np.mean, 'frp':np.mean})
# Sum monthly means to get yearly total of means and rename and merge onto monthly df
ats_yearly_frp_unnormalised = ats_monthly_frp_unnormalised.groupby(['lats_arcmin', 'lons_arcmin', 'year'], as_index=False).agg({'frp': np.sum})
ats_yearly_frp_unnormalised.rename(columns={'frp':'annual_frp'}, inplace=True)
ats_monthly_frp_unnormalised = ats_monthly_frp_unnormalised.merge(ats_yearly_frp_unnormalised[['annual_frp', 'year', 'lats_arcmin', 'lons_arcmin']], on=['lats_arcmin', 'lons_arcmin', 'year'])
# determine the contribution of each month to the yearly total to generate a weight
ats_monthly_frp_unnormalised['frp_weight'] = ats_monthly_frp_unnormalised.frp / ats_monthly_frp_unnormalised.annual_frp
# Determin the total FRE for each flare from the normalised dataset
ats_annual_frp_df['yearly_fre'] = ats_annual_frp_df.cloud_adjusted_mean_frp * 3.154e+7 / 1000000 # multiply by seconds in one year to get fre the divide by mill to get MJ
# Merge the FRE annual totals onto the monthly dataset and distribute accross the months using the weights
ats_monthly_frp_unnormalised = ats_monthly_frp_unnormalised.merge(ats_annual_frp_df[['yearly_fre', 'year', 'lats_arcmin', 'lons_arcmin']], on=['lats_arcmin', 'lons_arcmin', 'year'])
ats_monthly_frp_unnormalised['monthly_fre'] = ats_monthly_frp_unnormalised.frp_weight * ats_monthly_frp_unnormalised.yearly_fre
In [666]:
# assess difference in sampling between sensors
annual_sampling = ats_hotspot_sampling_df.groupby(['lats_arcmin', 'lons_arcmin', 'year'], as_index=False).agg({'sample_counts_all':np.sum,
'sensor':np.unique})
In [667]:
annual_sampling = annual_sampling[~((annual_sampling.year == 1996) | (annual_sampling.year==2002))]
In [669]:
sensor_sampling = annual_sampling.groupby(['lats_arcmin', 'lons_arcmin', 'sensor'], as_index=False).agg({'sample_counts_all':np.median})
In [681]:
at1_sampling = sensor_sampling[sensor_sampling.sensor == 'at1']
at2_sampling = sensor_sampling[sensor_sampling.sensor == 'at2']
ats_sampling = sensor_sampling[sensor_sampling.sensor == 'ats']
In [692]:
at1_sampling.rename(columns={"sample_counts_all":"sample_counts_at1"}, inplace=True)
at2_sampling.rename(columns={"sample_counts_all":"sample_counts_at2"}, inplace=True)
ats_sampling.rename(columns={"sample_counts_all":"sample_counts_ats"}, inplace=True)
In [705]:
at1_ats_sampling = at1_sampling.merge(ats_sampling, on=['lats_arcmin', 'lons_arcmin'])
at1_at2_sampling = at1_sampling.merge(at2_sampling, on=['lats_arcmin', 'lons_arcmin'])
at2_ats_sampling = at2_sampling.merge(ats_sampling, on=['lats_arcmin', 'lons_arcmin'])
In [707]:
plt.plot(at1_ats_sampling.sample_counts_at1, at1_ats_sampling.sample_counts_ats, 'r.')
plt.show()
plt.plot(at1_at2_sampling.sample_counts_at1, at1_at2_sampling.sample_counts_at2, 'r.')
plt.show()
plt.plot(at2_ats_sampling.sample_counts_at2, at2_ats_sampling.sample_counts_ats, 'r.')
plt.show()
In [708]:
odd_points = at1_ats_sampling[(at1_ats_sampling.sample_counts_at1 < 100) & (at1_ats_sampling.sample_counts_ats > 100)]
In [712]:
plt.plot(odd_points.lons_arcmin/100, odd_points.lats_arcmin/100, 'r.')
plt.show()
In [713]:
odd_points = at2_ats_sampling[(at2_ats_sampling.sample_counts_at2 < 100) & (at2_ats_sampling.sample_counts_ats > 100)]
In [725]:
plt.plot(odd_points.lons_arcmin/100, odd_points.lats_arcmin/100, 'r.')
plt.show()
In [638]:
# generate plots for gif generation ATS
years = np.arange(1994, 2012, 1)
months = np.arange(1,13, 1)
bin_x = np.arange(-180, 182, 1)
bin_y = np.arange(-90, 92, 1)
meshed_x, meshed_y = np.meshgrid(bin_x, bin_y)
meshed_x = meshed_x[:-1,:-1] +0.5
meshed_y = meshed_y[:-1,:-1] +0.5
for y in years:
for m in months:
plt.close('all')
sub_df = ats_hotspot_sampling_df[(ats_hotspot_sampling_df.year == y) &
(ats_hotspot_sampling_df.month == m)]
try:
binned_data = stats.binned_statistic_2d(sub_df.lons_arcmin/100, sub_df.lats_arcmin/100,
np.ones(sub_df.lons_arcmin.size), 'sum',
bins=[bin_x, bin_y]).statistic.T
except:
continue
mask = binned_data ==0
binned_data = np.log10(binned_data)
binned_data = np.ma.masked_array(binned_data, mask)
# set up figure
plt.close('all')
fig = plt.figure(figsize=(20,15))
fig.subplots_adjust(hspace=0.001, wspace=0.1)
ax1 = plt.subplot(1, 1, 1, projection=ccrs.EqualEarth())
ax_list = [ax1]
for i, ax in enumerate(ax_list):
ax.set_global()
ax.outline_patch.set_edgecolor('black')
ax.coastlines(color='black', linewidth=0.75)
ax.gridlines(color='black', linewidth=0.75, alpha=0.5)
ax1.text(0.94, 0.92, "year: " + str(y), transform=ax1.transAxes, fontsize=25)
ax1.text(0.94, 0.87, "month: " + str(m).zfill(2), transform=ax1.transAxes, fontsize=25)
p = ax1.pcolormesh(meshed_x, meshed_y, binned_data, cmap='Reds', transform=ccrs.PlateCarree(), vmax=3)
cbaxes1 = fig.add_axes([0.315, 0.2, 0.4, 0.02])
cb1 = plt.colorbar(p, cax=cbaxes1, orientation="horizontal")
cb1.set_label('Log Count', fontsize=18)
plt.savefig('figs/sampling/' + str(y)+str(m).zfill(2)+'.png', bbox_inches='tight')
#plt.show()
In [407]:
# generate plots for gif generation ATS
years = np.arange(1994, 2012, 1)
months = np.arange(1,13, 1)
bin_x = np.arange(-180, 182, 2)
bin_y = np.arange(-90, 92, 2)
meshed_x, meshed_y = np.meshgrid(bin_x, bin_y)
meshed_x = meshed_x[:-1,:-1] +1
meshed_y = meshed_y[:-1,:-1] +1
for y in years:
for m in months:
plt.close('all')
sub_df = ats_monthly_frp_unnormalised[(ats_monthly_frp_unnormalised.year == y) &
(ats_monthly_frp_unnormalised.month == m)]
try:
binned_data = stats.binned_statistic_2d(sub_df.lons, sub_df.lats,
sub_df.frp, 'sum',
bins=[bin_x, bin_y])
binned_data = binned_data.statistic.T
except:
continue
mask = binned_data > 0
binned_data = binned_data[mask]
lons = meshed_x[mask]
lats = meshed_y[mask]
# set up figure
plt.close('all')
fig = plt.figure(figsize=(20,15))
fig.subplots_adjust(hspace=0.001, wspace=0.1)
ax1 = plt.subplot(1, 1, 1, projection=ccrs.EqualEarth())
ax_list = [ax1]
for i, ax in enumerate(ax_list):
ax.set_global()
ax.outline_patch.set_edgecolor('black')
ax.coastlines(color='black', linewidth=0.75)
ax.gridlines(color='black', linewidth=0.75, alpha=0.5)
ax1.text(0.94, 0.92, "year: " + str(y), transform=ax1.transAxes, fontsize=25)
ax1.text(0.94, 0.87, "month: " + str(m).zfill(2), transform=ax1.transAxes, fontsize=25)
scat = ax1.scatter(lons,lats,transform=ccrs.PlateCarree(),
s=binned_data, lw=0.5,
c=binned_data,
cmap='plasma',
vmax=1000,
alpha=0.65)
cbaxes1 = fig.add_axes([0.315, 0.2, 0.4, 0.02])
cb1 = plt.colorbar(scat, cax=cbaxes1, orientation="horizontal")
cb1.set_label('FRE (MJ)', fontsize=18)
plt.savefig('figs/' + str(y)+str(m).zfill(2)+'.png', bbox_inches='tight')
#plt.show()
In [408]:
# Get monhtly mean FRP for each flare (not normalised)
sls_monthly_frp_unnormalised = sls_flare_df.groupby(['lats_arcmin', 'lons_arcmin', 'year', 'month'], as_index=False).agg({'lats': np.mean, 'lons':np.mean, 'frp':np.mean})
# Sum monthly means to get yearly total of means and rename and merge onto monthly df
sls_yearly_frp_unnormalised = sls_monthly_frp_unnormalised.groupby(['lats_arcmin', 'lons_arcmin', 'year'], as_index=False).agg({'frp': np.sum})
sls_yearly_frp_unnormalised.rename(columns={'frp':'annual_frp'}, inplace=True)
sls_monthly_frp_unnormalised = sls_monthly_frp_unnormalised.merge(sls_yearly_frp_unnormalised[['annual_frp', 'year', 'lats_arcmin', 'lons_arcmin']], on=['lats_arcmin', 'lons_arcmin', 'year'])
# determine the contribution of each month to the yearly total to generate a weight
sls_monthly_frp_unnormalised['frp_weight'] = sls_monthly_frp_unnormalised.frp / sls_monthly_frp_unnormalised.annual_frp
# Determin the total FRE for each flare from the normalised dataset
sls_annual_frp_df['yearly_fre'] = sls_annual_frp_df.cloud_adjusted_mean_frp * 3.154e+7 / 1000000 # multiply by seconds in one year to get fre the divide by mill to get MJ
# Merge the FRE annual totals onto the monthly dataset and distribute accross the months using the weights
sls_monthly_frp_unnormalised = sls_monthly_frp_unnormalised.merge(sls_annual_frp_df[['yearly_fre', 'year', 'lats_arcmin', 'lons_arcmin']], on=['lats_arcmin', 'lons_arcmin', 'year'])
sls_monthly_frp_unnormalised['monthly_fre'] = sls_monthly_frp_unnormalised.frp_weight * sls_monthly_frp_unnormalised.yearly_fre
In [409]:
# generate plots for gif generation SLS
years = np.arange(2016, 2019, 1)
months = np.arange(1,13, 1)
bin_x = np.arange(-180, 182, 2)
bin_y = np.arange(-90, 92, 2)
meshed_x, meshed_y = np.meshgrid(bin_x, bin_y)
meshed_x = meshed_x[:-1,:-1] +1
meshed_y = meshed_y[:-1,:-1] +1
for y in years:
for m in months:
plt.close('all')
sub_df = sls_monthly_frp_unnormalised[(sls_monthly_frp_unnormalised.year == y) &
(sls_monthly_frp_unnormalised.month == m)]
try:
binned_data = stats.binned_statistic_2d(sub_df.lons, sub_df.lats,
sub_df.frp, 'sum',
bins=[bin_x, bin_y])
binned_data = binned_data.statistic.T
except:
continue
mask = binned_data > 0
binned_data = binned_data[mask]
lons = meshed_x[mask]
lats = meshed_y[mask]
# set up figure
plt.close('all')
fig = plt.figure(figsize=(20,15))
fig.subplots_adjust(hspace=0.001, wspace=0.1)
ax1 = plt.subplot(1, 1, 1, projection=ccrs.EqualEarth())
ax_list = [ax1]
for i, ax in enumerate(ax_list):
ax.set_global()
ax.outline_patch.set_edgecolor('black')
ax.coastlines(color='black', linewidth=0.75)
ax.gridlines(color='black', linewidth=0.75, alpha=0.5)
ax1.text(0.94, 0.92, "year: " + str(y), transform=ax1.transAxes, fontsize=25)
ax1.text(0.94, 0.87, "month: " + str(m).zfill(2), transform=ax1.transAxes, fontsize=25)
scat = ax1.scatter(lons,lats,transform=ccrs.PlateCarree(),
s=binned_data, lw=0.5,
c=binned_data,
cmap='plasma',
vmax=1000,
alpha=0.65)
cbaxes1 = fig.add_axes([0.315, 0.2, 0.4, 0.02])
cb1 = plt.colorbar(scat, cax=cbaxes1, orientation="horizontal")
cb1.set_label('FRE (MJ)', fontsize=18)
plt.savefig('figs/' + str(y)+str(m).zfill(2)+'.png', bbox_inches='tight')
#plt.show()
In [410]:
df_with_bcm = ats_annual_frp_by_country_with_sensor.copy()
df_with_bcm = ats_annual_frp_by_country.merge(bcm_df, on=['countries', 'year'])
df_with_bcm.dropna(inplace=True)
# get rid of end years
df_with_bcm = df_with_bcm[df_with_bcm.year != 2012]
df_with_bcm = df_with_bcm[df_with_bcm.year != 1991]
df_with_bcm = df_with_bcm[df_with_bcm.year != 1992]
df_with_bcm = df_with_bcm[df_with_bcm.year != 2001]
df_with_bcm = df_with_bcm[df_with_bcm.year != 2002]
# russia and nigeria
df_with_bcm_rn = df_with_bcm.copy()
df_with_bcm_rn = df_with_bcm_rn[(df_with_bcm_rn.countries == 'Russia') |
(df_with_bcm_rn.countries == 'Nigeria')]
# all other countires
df_with_bcm_not_rn = df_with_bcm.copy()
df_with_bcm_not_rn = df_with_bcm_not_rn[(df_with_bcm_not_rn.countries != 'Russia') &
(df_with_bcm_not_rn.countries != 'Nigeria')]
In [411]:
plt.close('all')
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(12, 9))
h = ax.hexbin(df_with_bcm.cloud_adjusted_mean_frp, df_with_bcm.bcm, gridsize=50, bins='log', mincnt=1,
cmap='rainbow', linewidths=0.5, edgecolors='k')
cbar = plt.colorbar(mappable=h).set_label('Log Sample Count', fontsize=20)
sns.regplot(df_with_bcm_rn.cloud_adjusted_mean_frp, df_with_bcm_rn.bcm, ci=68, color='k', ax=ax, scatter=False)
sns.regplot(df_with_bcm_not_rn.cloud_adjusted_mean_frp, df_with_bcm_not_rn.bcm, ci=68, color='k', ax=ax, scatter=False)
ax.set(ylabel='DMSP Annual Flare Gas Volume (BCM)', xlabel='ATSR Annual Normalised FRP (MW)')
ax.yaxis.label.set_size(20)
ax.xaxis.label.set_size(20)
slope0, intercept0, r_value0, _, _ = scipy.stats.linregress(df_with_bcm_rn.cloud_adjusted_mean_frp,
df_with_bcm_rn.bcm)
slope1, intercept1, r_value1, _, _ = scipy.stats.linregress(df_with_bcm_not_rn.cloud_adjusted_mean_frp,
df_with_bcm_not_rn.bcm)
textstr0 = '$R^2=%.3f$\n$y=%.2fx + %.2f$\n Samples$=%.0f$' % (r_value0**2, slope0, intercept0, df_with_bcm_rn.bcm.shape[0])
textstr1 = '$R^2=%.3f$\n$y=%.2fx + %.2f$\n Samples$=%.0f$' % (r_value1**2, slope1, intercept1, df_with_bcm_not_rn.bcm.shape[0])
props = dict(boxstyle='round', facecolor='white', alpha=0.5)
ax.text(0.6, 0.9, textstr0, transform=ax.transAxes, fontsize=14,
verticalalignment='top', bbox=props)
ax.text(0.7, 0.22, textstr1, transform=ax.transAxes, fontsize=14,
verticalalignment='top', bbox=props)
plt.savefig(os.path.join(output_path, 'dmsp_vs_ats.png'), bbox_inches='tight', dpi=600)
plt.show()
In [412]:
# reduce the data to only complete years
ats_with_sensor_df = ats_annual_frp_by_country_with_sensor.copy()
sls_with_sensor_df = sls_annual_frp_by_country_wtih_sensor.copy()
# for yr in [1991, 2012, 2016]:
# ats_with_sensor_df = ats_with_sensor_df[ats_with_sensor_df.year != yr]
# sls_with_sensor_df = sls_with_sensor_df[sls_with_sensor_df.year != yr]
# combine the dataframes
combined_with_sensor_df = pd.concat([ats_with_sensor_df, sls_with_sensor_df])
# get the global annual FRP values
global_frp = combined_with_sensor_df.groupby(['sensor', 'year'], as_index=False).agg({'cloud_adjusted_mean_frp': np.sum})
global_frp.cloud_adjusted_mean_frp /= 1000 # why this? Conversion to GW
global_frp = global_frp.pivot('year', 'sensor', 'cloud_adjusted_mean_frp')
global_frp.columns.name = None
global_frp.rename(columns={'at1': 'AT1', 'at2':'AT2', 'ats':'ATS', 'sls':'SLS'}, inplace=True)
global_frp = global_frp[['AT1', 'AT1 & AT2', 'AT2', 'AT2 & ATS', 'ATS', 'SLS']]
# now aggregate to remove sensor information from screened data
to_group = ['countries', 'year']
to_agg = {'cloud_adjusted_mean_frp': np.sum}
combined_df = combined_with_sensor_df.groupby(to_group, as_index=False).agg(to_agg)
combined_df.cloud_adjusted_mean_frp /= 1000
# # find the top n flaring countries
n_countries = 30
top_flaring_countries = combined_df.groupby('countries').agg({'cloud_adjusted_mean_frp': np.sum}).sort_values('cloud_adjusted_mean_frp', ascending=False).head(n_countries).reset_index()
# # reduce to the top countries
top_flaring_activity = combined_df.merge(top_flaring_countries[['countries']], on=['countries'])
# pivoted
pivoted_df = top_flaring_activity.pivot('year', 'countries', 'cloud_adjusted_mean_frp')
df_list = []
for c in top_flaring_countries.countries.values:
df_list.append(pivoted_df[c])
top_flaring_activity = pd.concat(df_list, axis=1)
In [413]:
plt.close('all')
#fig, ax = plt.figure(figsize=(18,7))
fig, (ax0, ax1) = plt.subplots(nrows=2, ncols=1,
gridspec_kw={'height_ratios': [2, 7.5]},
figsize=(15, 12))
sns.heatmap(global_frp.T, linewidths=1,
robust=True, annot=True,
fmt='.1f', annot_kws={'fontsize':13},
cbar=False,
cmap='Greys_r',
ax=ax0)
ax0.set(xlabel='', ylabel='Global')
ax0.yaxis.label.set_size(20)
ax0.set_xticklabels(ax0.get_xticklabels(), fontsize=12)
ax0.set_yticklabels(ax0.get_yticklabels(), fontsize=12, rotation=0)
sns.heatmap(top_flaring_activity.T, linewidths=1,
robust=True, annot=True,
fmt='.2f', annot_kws={'fontsize':13},
cbar=False,
vmin=-0.3, vmax=2,
ax=ax1)
ax1.set(xlabel='Year', ylabel='Country')
ax1.yaxis.label.set_size(20)
ax1.xaxis.label.set_size(20)
ax1.set_xticklabels(ax1.get_xticklabels(), fontsize=12)
ax1.set_yticklabels(ax1.get_yticklabels(), fontsize=12)
plt.savefig(os.path.join(output_path, 'ats_frp.png'), bbox_inches='tight', dpi=600)
plt.show()
In [414]:
# reduce the data to only complete years
ats_with_sensor_df = ats_annual_frp_by_country_with_sensor.copy()
sls_with_sensor_df = sls_annual_frp_by_country_wtih_sensor.copy()
# for yr in [1991, 2012, 2016]:
# ats_with_sensor_df = ats_with_sensor_df[ats_with_sensor_df.year != yr]
# sls_with_sensor_df = sls_with_sensor_df[sls_with_sensor_df.year != yr]
# combine the dataframes
combined_with_sensor_df = pd.concat([ats_with_sensor_df, sls_with_sensor_df])
# get the global annual FRP values
global_frp = combined_with_sensor_df.groupby(['sensor', 'year'], as_index=False).agg({'cloud_adjusted_mean_frp': np.sum})
global_frp.cloud_adjusted_mean_frp /= 1000 # why this? Conversion to MW
global_frp = global_frp.pivot('year', 'sensor', 'cloud_adjusted_mean_frp')
global_frp.columns.name = None
global_frp.rename(columns={'at1': 'AT1', 'at2':'AT2', 'ats':'ATS', 'sls':'SLS'}, inplace=True)
global_frp = global_frp[['AT1', 'AT1 & AT2', 'AT2', 'AT2 & ATS', 'ATS', 'SLS']]
# now aggregate to remove sensor information from screened data
to_group = ['countries', 'year']
to_agg = {'cloud_adjusted_mean_frp': np.sum}
combined_df = combined_with_sensor_df.groupby(to_group, as_index=False).agg(to_agg)
combined_df.cloud_adjusted_mean_frp /= 1000
# # find the top n flaring countries
n_countries = 30
top_flaring_countries = combined_df.groupby('countries').agg({'cloud_adjusted_mean_frp': np.sum}).sort_values('cloud_adjusted_mean_frp', ascending=False).head(n_countries).reset_index()
# # reduce to the top countries
top_flaring_activity = combined_df.merge(top_flaring_countries[['countries']], on=['countries'])
# pivoted
pivoted_df = top_flaring_activity.pivot('year', 'countries', 'cloud_adjusted_mean_frp')
df_list = []
for c in top_flaring_countries.countries.values:
df_list.append(pivoted_df[c])
top_flaring_activity = pd.concat(df_list, axis=1)
In [415]:
global_frp_plot = combined_with_sensor_df.groupby(['year'], as_index=True).agg({'cloud_adjusted_mean_frp': np.sum})
global_frp_plot.cloud_adjusted_mean_frp /= 1000 # why this? Conversion to GW
pc_plot_frp = pd.DataFrame(top_flaring_activity.values / global_frp_plot.values * 100,
columns=top_flaring_activity.T.index,
index=top_flaring_activity.index)
In [416]:
# sort out diff plot showing only years with reasonable sampling
diff_plot = pc_plot_frp.copy(deep=True)
diff_plot.drop([1991,1992,2012], axis=0, inplace=True)
diff_plot = diff_plot - diff_plot.shift()
diff_plot.loc[1991] = np.nan
diff_plot.loc[1992] = np.nan
diff_plot.loc[2012] = np.nan
diff_plot.sort_index(inplace=True)
In [417]:
plt.close('all')
#fig, ax = plt.figure(figsize=(18,7))
fig, (ax0, ax1, ax2) = plt.subplots(nrows=3, ncols=1,
gridspec_kw={'height_ratios': [2, 7.5, 7.5]},
figsize=(15, 22))
sns.heatmap(global_frp.T, linewidths=1,
robust=True, annot=True,
annot_kws={'fontsize':13},
fmt='.1f',
cbar=False,
cmap='Greys_r',
vmax=18,
ax=ax0)
ax0.set(xlabel='', ylabel='a) Global FRP (GW)')
ax0.yaxis.label.set_size(20)
ax0.set_xticklabels(ax0.get_xticklabels(), fontsize=12)
ax0.set_yticklabels(ax0.get_yticklabels(), fontsize=12, rotation=0)
sns.heatmap(pc_plot_frp.T, linewidths=1,
robust=True, annot=True,
annot_kws={'fontsize':13},
fmt='.1f',
cbar=False,
cmap='viridis',
vmax = 18,
ax=ax1)
ax1.set(xlabel='', ylabel='b) Fraction of Global FRP (%)')
ax1.yaxis.label.set_size(20)
ax1.set_yticklabels(ax1.get_yticklabels(), fontsize=12)
sns.heatmap(diff_plot.T, linewidths=1,
robust=True, annot=True,
annot_kws={'fontsize':13},
fmt='.1f',
cbar=False,
cmap='coolwarm',
vmax=4, vmin=-4,
ax=ax2)
ax2.set(xlabel='Year', ylabel='c) Annual Change in Fraction of Global FRP (%)')
ax2.yaxis.label.set_size(20)
ax2.xaxis.label.set_size(20)
ax2.set_xticklabels(ax1.get_xticklabels(), fontsize=12)
ax2.set_yticklabels(ax1.get_yticklabels(), fontsize=12)
plt.savefig(os.path.join(output_path, 'flare_frp_analysis.png'), bbox_inches='tight', dpi=600)
plt.show()
In [750]:
pc_plot_frp.sum(axis=1)
Out[750]:
In [418]:
frp_global_pc_change = combined_with_sensor_df.groupby(['sensor', 'year'], as_index=False).agg({'cloud_adjusted_mean_frp': np.sum})
frp_global_pc_change = frp_global_pc_change.sort_values('year')
frp_global_pc_change['pc_change'] = (frp_global_pc_change.cloud_adjusted_mean_frp / frp_global_pc_change.cloud_adjusted_mean_frp.shift() - 1)
frp_global_pc_change.drop('cloud_adjusted_mean_frp', axis=1, inplace=True)
frp_global_pc_change = frp_global_pc_change.pivot('year', 'sensor', 'pc_change')
frp_global_pc_change.rename(columns={'at1': 'AT1', 'at2':'AT2', 'ats':'ATS', 'sls':'SLS'}, inplace=True)
frp_global_pc_change = frp_global_pc_change[['AT1', 'AT1 & AT2', 'AT2', 'AT2 & ATS', 'ATS', 'SLS']]
frp_country_pc_change = (top_flaring_activity / top_flaring_activity.shift() - 1)
for yr in [1992]:
frp_global_pc_change = frp_global_pc_change[frp_global_pc_change.index != yr]
frp_country_pc_change = frp_country_pc_change[frp_country_pc_change.index != yr]
In [419]:
#frp_global_pc_change = frp_global_pc_change[frp_global_pc_change.index >= 2003]
#frp_global_pc_change = frp_global_pc_change.T[(frp_global_pc_change.T.index == u'ATS') |
# (frp_global_pc_change.T.index == u'SLS')].T
#frp_country_pc_change = frp_country_pc_change[frp_country_pc_change.index >= 2003]
In [420]:
plt.close('all')
#fig, ax = plt.figure(figsize=(18,7))
fig, (ax0, ax1) = plt.subplots(nrows=2, ncols=1,
gridspec_kw={'height_ratios': [2, 7.5]},
figsize=(15, 12))
sns.heatmap(frp_global_pc_change.T*100, linewidths=1,
robust=True, annot=True,
fmt='.0f', annot_kws={'fontsize':13},
cbar=False,
cmap='coolwarm',
vmax=10, vmin=-10,
ax=ax0)
ax0.set(xlabel='', ylabel='Global')
ax0.yaxis.label.set_size(20)
ax0.set_xticklabels(ax0.get_xticklabels(), fontsize=12)
ax0.set_yticklabels(ax0.get_yticklabels(), fontsize=12, rotation=0)
sns.heatmap(frp_country_pc_change.T*100, linewidths=1,
robust=True, annot=True,
fmt='.0f', annot_kws={'fontsize':13},
cbar=False,
cmap='coolwarm',
vmax=50, vmin=-50,
ax=ax1)
ax1.set(xlabel='Year', ylabel='Country')
ax1.yaxis.label.set_size(20)
ax1.xaxis.label.set_size(20)
ax1.set_xticklabels(ax1.get_xticklabels(), fontsize=12)
ax1.set_yticklabels(ax1.get_yticklabels(), fontsize=12)
plt.savefig(os.path.join(output_path, 'ats_frp_change.png'), bbox_inches='tight', dpi=600)
plt.show()
In [421]:
ats_with_sensor_df.head()
Out[421]:
In [422]:
counts = global_activity.sum(axis=1)
frp = global_frp.sum(axis=1)
In [423]:
plt.close('all')
plt.plot(counts.index, frp.values / counts.values, 'r.')
plt.show()
In [424]:
ats_flare_df_with_countries = ats_flare_df.merge(ats_countries_df[['countries', 'lats_arcmin', 'lons_arcmin']], on=['lats_arcmin', 'lons_arcmin'])
In [425]:
sls_flare_df_with_countries = sls_flare_df.merge(sls_countries_df[['countries', 'lats_arcmin', 'lons_arcmin']], on=['lats_arcmin', 'lons_arcmin'])
In [426]:
ats_flare_df_uk_1994 = ats_flare_df_with_countries[(ats_flare_df_with_countries.countries == 'United Kingdom') &
(ats_flare_df_with_countries.year == 1994)]
In [427]:
ats_flare_df_uk_1994[['year', 'month', 'day', 'hhmm', 'frp', 'lats', 'lons']].sort_values('frp', ascending=False).head(20)
Out[427]:
In [428]:
ats_flare_df_malaysia_2011 = ats_flare_df_with_countries[(ats_flare_df_with_countries.countries == 'Malaysia') &
(ats_flare_df_with_countries.year == 2011)]
In [429]:
ats_flare_df_malaysia_2011[['year', 'month', 'day', 'hhmm', 'frp', 'lats', 'lons']].sort_values('frp', ascending=False).head(20)
Out[429]:
In [430]:
ats_flare_df_indonesia_2011 = ats_flare_df_with_countries[(ats_flare_df_with_countries.countries == 'Indonesia') &
(ats_flare_df_with_countries.year == 2011)]
In [431]:
ats_flare_df_indonesia_2011[['year', 'month', 'day', 'hhmm', 'frp', 'lats', 'lons']].sort_values('frp', ascending=False).head(20)
Out[431]:
In [432]:
ats_flare_df_russia_2002 = ats_flare_df_with_countries[(ats_flare_df_with_countries.countries == 'Russia') &
(ats_flare_df_with_countries.year == 2002)]
In [433]:
ats_flare_df_russia_2002[['year', 'month', 'day', 'hhmm', 'frp', 'lats', 'lons']].sort_values('frp', ascending=False).head(20)
Out[433]:
In [434]:
sls_flare_df_venezuela = sls_flare_df_with_countries[(sls_flare_df_with_countries.countries == 'Venezuela')]
sls_flare_df_venezuela[['year', 'month', 'day', 'hhmm', 'frp', 'lats', 'lons']].sort_values('frp', ascending=False).head(50)
Out[434]:
In [435]:
sls_flare_df_us = sls_flare_df_with_countries[(sls_flare_df_with_countries.countries == 'United States')]
sls_flare_df_us[['year', 'month', 'day', 'hhmm', 'frp', 'lats', 'lons']].sort_values('frp', ascending=False).head(20)
Out[435]:
In [436]:
sls_flare_df_iraq = sls_flare_df_with_countries[(sls_flare_df_with_countries.countries == 'Iraq')]
sls_flare_df_iraq[['year', 'month', 'day', 'hhmm', 'frp', 'lats', 'lons']].sort_values('frp', ascending=False).head(20)
Out[436]:
In [437]:
sls_flare_df_iran = sls_flare_df_with_countries[(sls_flare_df_with_countries.countries == 'Iran')]
sls_flare_df_iran[['year', 'month', 'day', 'hhmm', 'frp', 'lats', 'lons']].sort_values('frp', ascending=False).head(20)
Out[437]:
This map shows each flares emissions over the entire ATSR timeseries. We use the screened data (from the screening process above) and the FRP is the cloud adjusted values. So it takes into account the persistency and typical cloud cover of the flare when computing the average.
This is currently showing the mean over the lifetime. Change it to back to mean over year, multiply by time in year, then group over years to get flare activity.
In [438]:
def myround(x, base=0.5):
return base * np.round(x/base)
def plot_total_frp(frp, binned_stat, top_flares):
plt.close('all')
fig = plt.figure(figsize=(20,9))
ax = plt.axes(projection=ccrs.PlateCarree())
ax.set_xlim(( -180, 180))
ax.set_ylim((-90, 90))
ax.coastlines(color='white', linewidth=0.75)
ax.imshow(np.tile(np.array([[[0,0,0]]],
dtype=np.uint8), [2, 2, 1]),
origin='upper',
transform=ccrs.PlateCarree(),
extent=img_extent)
#ax.text(0.94, 0.92, lab, transform=ax.transAxes, fontsize=25, color='w')
scat = ax.scatter(frp_df.lons, frp_df.lats,
s=0.05,
color='cyan',
alpha=0.2,
label='Flare Locations')
scat2 = ax.scatter(top_flares.lons, top_flares.lats,
s=55,
facecolor='chartreuse',
edgecolor='k',
alpha=1,
label='10% Flare Locations')
leg0 = ax.legend(loc = 3, scatterpoints = 1, prop={'size': 15,})
leg0.get_frame().set_facecolor('k')
leg0.legendHandles[0]._sizes = [40]
leg0.legendHandles[0].set_alpha(1)
leg0.legendHandles[1]._sizes = [110]
leg0.legendHandles[1].set_alpha(1)
plt.setp(leg0.get_texts(), color='w')
img_extent = (-180, 180, -90, 90)
im = ax.imshow(binned_stat, origin='upper', cmap='inferno', norm=LogNorm(vmin=10, vmax=1000), alpha=1,
extent=img_extent, transform=ccrs.PlateCarree())
cbar = plt.colorbar(mappable=im).set_label('Log BCM', fontsize=20)
#-------------------
# SUBPLOTS
#-------------------
# set up the subaxes and locations
sub_x_extent = [(46, 53), (5,25), (4.5, 13.5)]
sub_y_extent = [(25, 33), (27,33), (-9, 7)]
rects = [[0.43, 0.2, 0.3, 0.25], [0.27, 0.68, 0.15, 0.2], [-0.12, 0.27, 0.6, 0.4]]
locs = [(1,2), (3,4), (1,4)]
# add in zoomed plots for rois
for sub_x, sub_y, rect, loc in zip(sub_x_extent, sub_y_extent, rects, locs):
sub_ax = fig.add_axes(rect,
projection=ccrs.PlateCarree(), )
sub_ax.set_xlim(sub_x)
sub_ax.set_ylim(sub_y)
sub_ax.outline_patch.set_edgecolor('white')
# show inset loc
mark_inset(ax, sub_ax, loc1=loc[0], loc2=loc[1], fc="none", lw=1, ec='w')
# add background
sub_ax.coastlines(color='white', linewidth=0.75)
# country_boundaries = feature.NaturalEarthFeature(category='cultural',
# name='admin_0_countries',
# scale='10m', facecolor='none')
# sub_ax.add_feature(country_boundaries, edgecolor='white')
sub_ax.imshow(np.tile(np.array([[[0,0,0]]],
dtype=np.uint8), [2, 2, 1]),
origin='upper',
transform=ccrs.PlateCarree(),
extent=sub_x + sub_y)
sub_scat = sub_ax.scatter(frp_df.lons, frp_df.lats,
s=0.05,
color='cyan',
alpha=0.2)
sub_scat2 = sub_ax.scatter(top_flares.lons, top_flares.lats,
s=55,
facecolor='chartreuse',
edgecolor='k',
alpha=1)
#plt.savefig(os.path.join(output_path, 'flare_map.png'), bbox_inches='tight', dpi=600)
plt.show()
In [439]:
# compute the annual mean for each flare
to_group = ['lats_arcmin', 'lons_arcmin', 'year']
to_agg = {'frp': np.sum,
'sample_counts_flare': np.sum,
'lats': np.mean,
'lons': np.mean}
summed_flare_df = ats_flare_df.groupby(to_group, as_index=False).agg(to_agg)
# get the cloud cover adjusted sampling
to_group = ['lats_arcmin', 'lons_arcmin', 'year']
to_agg = {'cloud_cover': np.mean,
'sample_counts_all': np.sum}
summed_sample_df = ats_flare_sampling_df.groupby(to_group, as_index=False).agg(to_agg)
# merge and get cloud cloud adjusted sampling (i.e the expected number of observations)
frp_df = pd.merge(summed_flare_df, summed_sample_df, on=to_group)
frp_df['cloud_adjusted_sample_counts'] = frp_df.sample_counts_all * (1-frp_df.cloud_cover)
# if expected less than actual, then set to actual number of obsercations
mask = frp_df['cloud_adjusted_sample_counts'] < frp_df['sample_counts_flare']
frp_df.loc[mask, 'cloud_adjusted_sample_counts'] = frp_df.loc[mask, 'sample_counts_flare']
# compute mean frp
frp_df['cloud_adjusted_mean_frp'] = frp_df.frp / frp_df.cloud_adjusted_sample_counts
# get yearly sums
to_group = ['lats_arcmin', 'lons_arcmin']
to_agg = {'cloud_adjusted_mean_frp': np.sum, 'lats': np.mean, 'lons': np.mean}
annual_grouped_df = frp_df.groupby(to_group, as_index=False).agg(to_agg)
In [440]:
annual_grouped_df['frp_pc_total'] = annual_grouped_df.cloud_adjusted_mean_frp / annual_grouped_df.cloud_adjusted_mean_frp.sum()
In [441]:
sorted_annual_grouped_df = annual_grouped_df.sort_values('frp_pc_total', ascending=False)
sorted_annual_grouped_df['frp_pc_cumsum'] = sorted_annual_grouped_df.frp_pc_total.cumsum()
sorted_annual_grouped_df.reset_index(inplace=True, drop=True)
In [790]:
annual_grouped_df[(annual_grouped_df.lats_arcmin==3140) & (annual_grouped_df.lons_arcmin==604)]
Out[790]:
In [792]:
sorted_annual_grouped_df.head(20)
Out[792]:
In [443]:
n_flares_10_pc = np.sum(sorted_annual_grouped_df.frp_pc_cumsum<0.1)
print n_flares_10_pc
n_flares_50_pc = np.sum(sorted_annual_grouped_df.frp_pc_cumsum<0.5)
print n_flares_50_pc
n_flares_90_pc = np.sum(sorted_annual_grouped_df.frp_pc_cumsum<0.9)
print n_flares_90_pc
In [444]:
plt.close('all')
plt.figure(figsize=(9,9))
plt.plot(np.arange(sorted_annual_grouped_df.shape[0]), sorted_annual_grouped_df.frp_pc_total.cumsum(), 'k--',
linewidth=1, label='Cumulative Percentage')
plt.plot([n_flares_10_pc, n_flares_10_pc], [-0.1, 0.1], 'k-', linewidth=0.5, label='10% (88 Flares)')
plt.plot([-1000, n_flares_10_pc], [0.1, 0.1], 'k-', linewidth=1)
plt.plot([n_flares_50_pc, n_flares_50_pc], [-0.1, 0.5], 'r-', linewidth=0.5, label='50% (1311 Flares)')
plt.plot([-1000, n_flares_50_pc], [0.5, 0.5], 'r-', linewidth=1)
plt.plot([n_flares_90_pc, n_flares_90_pc], [-0.1, 0.9], 'b-', linewidth=0.5, label='90% (7868 Flares)')
plt.plot([-1000, n_flares_90_pc], [0.9, 0.9], 'b-', linewidth=1)
plt.legend(loc=4,prop={'size': 12,})
plt.ylim(-0, 1)
plt.xlim(-750, 21000)
plt.xlabel('Flare Count', fontsize=14)
plt.ylabel('CDF', fontsize=14)
plt.savefig(os.path.join(output_path, 'ats_cumulative_frp.png'), bbox_inches='tight', dpi=600)
plt.show()
In [445]:
bin_x = np.arange(-180, 185, 5)
bin_y = np.arange(-90, 95, 5)
bcm = 0.01*annual_grouped_df.cloud_adjusted_mean_frp + 0.52
binned_data = stats.binned_statistic_2d(annual_grouped_df.lons, annual_grouped_df.lats,
bcm, 'sum',
bins=[bin_x, bin_y])
binned_stat = np.ma.masked_array(binned_data.statistic, binned_data.statistic==0)
In [910]:
def hist_count(df):
bin_x = np.arange(-180, 181, 1)
bin_y = np.arange(-90, 91, 1)
meshed_x, meshed_y = np.meshgrid(bin_x, bin_y)
meshed_x = meshed_x[:-1,:-1] + 0.5
meshed_y = meshed_y[:-1,:-1] + 0.5
binned_data = stats.binned_statistic_2d(df.lons, df.lats,
np.ones(df.lons.size), 'sum',
bins=[bin_x, bin_y]).statistic.T
mask = binned_data == 0
Z = np.ma.masked_array(binned_data, mask)
return meshed_x, meshed_y, Z
def plot_bcm_totals(flare_df, cum_df):
# set up figure
# set up figure
plt.close('all')
fig = plt.figure(figsize=(20,9))
fig.subplots_adjust(hspace=0.001, wspace=0.1)
ax = plt.axes(projection=ccrs.EqualEarth())
ax.set_global()
ax.outline_patch.set_edgecolor('white')
ax.coastlines(color='white', linewidth=0.75)
ax.gridlines(color='white', linewidth=0.75, alpha=0.5)
img_extent = (-180, 180, -180, 180)
ax.imshow(np.tile(np.array([[[0,0,0]]],
dtype=np.uint8), [2, 2, 1]),
origin='upper',
transform=ccrs.PlateCarree(),
extent=img_extent)
X, Y, Z = hist_count(flare_df)
# levels= np.linspace(0, 2, 21)
# cmap = plt.get_cmap('inferno')
# norm = BoundaryNorm(levels, ncolors=cmap.N, clip=True)
# p = ax.pcolormesh(X, Y, Z, cmap=cmap, transform=ccrs.PlateCarree())
img_extent = (-180, 180, -90, 90)
im = ax.imshow(Z, origin='lower', cmap='inferno', norm=LogNorm(vmin=1, vmax=500),
extent=img_extent, transform=ccrs.PlateCarree())
cbar = plt.colorbar(mappable=im, ax=ax).set_label('Log BCM', fontsize=20)
plt.savefig(os.path.join(output_path, 'flare_map.png'), bbox_inches='tight', dpi=600)
plt.show()
In [911]:
plot_bcm_totals(annual_grouped_df, sorted_annual_grouped_df)
In [496]:
def plot_data_totals(df):
# set up figure
plt.close('all')
fig = plt.figure(figsize=(15,20))
ax1 = plt.subplot(1, 1, 1, projection=ccrs.EqualEarth())
# split data into top 10 pc and other 90
max_frp_pc = df.frp_pc_total.max()
top_10pc = df[df.frp_pc_cumsum<=0.1]
bottom_90pc= df[df.frp_pc_cumsum>0.1]
ax_list = [ax1]
for i, ax in enumerate(ax_list):
ax.set_global()
ax.outline_patch.set_edgecolor('black')
ax.coastlines(color='black', linewidth=0.75)
ax.gridlines(color='black', linewidth=0.75, alpha=0.5)
# make main plot
#ax_list[0].text(0.94, 0.92, lab, transform=ax_list[0].transAxes, fontsize=25)
scat = ax_list[0].scatter(top_10pc.lons, top_10pc.lats,
s=(top_10pc.frp_pc_total / max_frp_pc * 10)**3,
edgecolors='k',
facecolor='r',
transform=ccrs.PlateCarree())
scat = ax_list[0].scatter(bottom_90pc.lons, bottom_90pc.lats,
s=(bottom_90pc.frp_pc_total / max_frp_pc * 10)**3,
facecolor='b',
transform=ccrs.PlateCarree())
plt.show()
In [497]:
plot_data_totals(sorted_annual_grouped_df)
In [795]:
sorted_annual_grouped_df.head()
Out[795]:
In [799]:
sorted_annual_grouped_df.cloud_adjusted_mean_frp.sum() * 0.095
Out[799]:
In [ ]:
bcm = 0.01 * global_frp.sum(axis=1) * 1000 + 0.52
plt.close('all')
plt.bar(bcm.index, bcm.values)
plt.show()
In [ ]:
bcm
In [ ]:
global_frp.tail()
Notes:
ERS-2 Gyro failure in January 2001 (Months since gyro failure that are currently available: July to December 2001, July-August 2002, Jan-June 2003) http://earth.esa.int/workshops/meris_aatsr2008/participants/616/pres_616_Veal.pdf
ATSR-1 ERS-1 Jul 1991
TODO:
In [ ]: